In [None]:
from confluent_kafka import Consumer, KafkaError, KafkaException

# Configuration
conf = {
    'bootstrap.servers': 'localhost:9092',  # Redpanda broker address
    'group.id': 'python-consumer-group',   # Consumer group ID
    'auto.offset.reset': 'earliest'        # Start reading from the beginning of the topic
}

# Create a consumer instance
consumer = Consumer(conf)

# Subscribe to the topic
consumer.subscribe(['green-trips'])

try:
    while True:
        # Poll for messages
        msg = consumer.poll(timeout=1.0)  # Wait for 1 second for a message
        if msg is None:
            continue  # No message received
        if msg.error():
            if msg.error().code() == KafkaError._PARTITION_EOF:
                # End of partition event
                print(f"Reached end of partition {msg.partition()}")
            else:
                raise KafkaException(msg.error())
        else:
            # Successfully consumed a message
            print(f"Consumed message: {msg.value().decode('utf-8')} (Partition: {msg.partition()}, Offset: {msg.offset()})")
except KeyboardInterrupt:
    print("Consumer interrupted.")
finally:
    # Close the consumer
    consumer.close()

In [None]:
from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.connectors import FlinkKafkaConsumer
from pyflink.datastream.connectors.jdbc import JdbcSink, JdbcConnectionOptions, JdbcExecutionOptions
from pyflink.common.serialization import SimpleStringSchema
from pyflink.common import WatermarkStrategy, Types

In [None]:
# Define the Kafka source
def create_kafka_source():
    return FlinkKafkaConsumer(
        topics='green-trips',  # Kafka topic
        deserialization_schema=SimpleStringSchema(),  # Deserialize messages as strings
        properties={
            'bootstrap.servers': 'localhost:9092',  # Redpanda broker address
            'group.id': 'pyflink-consumer-group'    # Consumer group ID
        }
    )

# Define the PostgreSQL sink
def create_postgres_sink():
    return JdbcSink.sink(
        sql="INSERT INTO green_trips (trip_data) VALUES (?)",  # SQL query
        type_info=Types.ROW([Types.STRING()]),  # Data type (single string column)
        jdbc_connection_options=JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
            .with_url('jdbc:postgresql://localhost:5432/postgres')  # PostgreSQL URL
            .with_driver_name('org.postgresql.Driver')  # PostgreSQL driver
            .with_user_name('postgres')  # PostgreSQL username
            .with_password('postgres')  # PostgreSQL password
            .build(),
        jdbc_execution_options=JdbcExecutionOptions.builder()
            .with_batch_size(100)  # Batch size for inserts
            .build()
    )

# Main function
def main():
    # Set up the execution environment
    env = StreamExecutionEnvironment.get_execution_environment()
    env.add_jars(
        "file:///opt/flink/lib/flink-connector-kafka_2.12-1.16.0.jar",
        "file:///opt/flink/lib/flink-connector-jdbc_2.12-1.16.0.jar",
        "file:///opt/flink/lib/postgresql-42.6.0.jar"
    )

    # Create the Kafka source
    kafka_source = create_kafka_source()
    kafka_source.set_start_from_earliest()  # Start consuming from the earliest offset

    # Add the Kafka source to the environment
    stream = env.add_source(kafka_source)

    # Transform the data (if needed)
    # For example, convert the string message to a Row object
    transformed_stream = stream.map(
        lambda message: Row(message),  # Wrap the message in a Row object
        output_type=Types.ROW([Types.STRING()])
    )

    # Add the PostgreSQL sink
    transformed_stream.add_sink(create_postgres_sink())

    # Execute the job
    env.execute("Redpanda to PostgreSQL Job")

if __name__ == '__main__':
    main()