In [3]:
# version with filter for two tables + caching

from pyspark.sql.functions import to_date, col

ctrl = spark.table("cdf_control_silver_part").filter("table_name = 'tbl_bronze'").first()
last_version = ctrl["last_version"]     #946



# find current table version
current_version = spark.sql("DESCRIBE HISTORY tbl_bronze")\
                       .selectExpr("max(version) as v")\
                       .first()["v"]

if current_version > last_version:

    # get changes and add partition_date column
    changes_df = spark.sql(f"""
    SELECT 
            timestamp,
            symbol,
            exchange,
            event_type,
            latency_ms,
            order_id,
            transaction_type,
            price,
            volume,
            bid_price,
            ask_price,
            bid_size,
            ask_size,
            canceled_order_id,
            currency,
            trade_id,
            event_id,
            EventProcessedUtcTime,
            PartitionId,
            EventEnqueuedUtcTime   
    FROM table_changes('tbl_bronze', {last_version})
    WHERE _change_type = 'insert'
    """)

    with_date = changes_df \
    .withColumn("partition_date", to_date(col("timestamp"))) \
    .cache()                   #  cacheujemy wynik

    # filter once per cached object
    usd_df = with_date.filter(col("currency") == "USD")
    wrong_df = with_date.filter(col("currency") != "USD")

    # save both together, but without recalculating
    usd_df.write \
        .format("delta") \
        .mode("append") \
        .saveAsTable("tbl_silver_part")

    wrong_df.write \
        .format("delta") \
        .mode("append") \
        .saveAsTable("tbl_silver_wrong_currency")

    # refresh the control table
    spark.sql(f"""
    UPDATE cdf_control_silver_part
    SET last_version = {current_version}
    WHERE table_name = 'tbl_bronze'
    """)

else:
    print("Brak nowych zmian do przetworzenia.")


StatementMeta(, 2f7629e2-8f8a-4935-8cce-3c344bb56654, 5, Finished, Available, Finished)