In [0]:
from pyspark.sql import functions as F
import dlt

@dlt.view
def raw_feed():
    return (
        spark.readStream
        .format('cloudFiles')
        .option('cloudFiles.format','json')
        .load('s3://bookstorerawdata/feed_batch/')
        .select(
            F.col('topic')
            ,F.col('key').cast('string')
            ,F.col('value').cast('string')
            ,(F.col('timestamp')/1000).cast('timestamp').alias('create_ts')
            ,F.input_file_name().alias('source_file')
            ,F.current_timestamp().alias('insert_ts')
        )
    )

@dlt.table(name='bookstore.bronze.books')
def books():
    return (
        dlt.read('raw_feed')
        .filter(F.col('topic') == 'books')
        .withColumn('v',F.from_json(F.col('value'),('book_id STRING, title STRING, author STRING, price DOUBLE, updated TIMESTAMP')))
        .select('key','create_ts','source_file', 'insert_ts', 'v.*')
    )

@dlt.table(name='bookstore.bronze.customers')
def customers():
    return (
        dlt.read('raw_feed')
        .filter(F.col('topic') == 'customers')
        .withColumn('v',F.from_json(F.col('value'),('customer_id STRING, email STRING, first_name STRING, last_name STRING, gender STRING, city STRING, country_code STRING, row_status STRING, row_time timestamp')))
        .select('key','create_ts','source_file','insert_ts','v.*')
    )

@dlt.table(name='bookstore.bronze.orders')
def orders():
    return (
        dlt.read('raw_feed')
        .filter(F.col('topic') == 'orders')
        .withColumn('v', F.from_json(F.col('value'),('order_id STRING, order_timestamp Timestamp, customer_id STRING, quantity BIGINT, total BIGINT, email STRING, first_name STRING, last_name STRING, gender STRING, street STRING, city STRING, country STRING, row_time TIMESTAMP, processed_timestamp TIMESTAMP, books ARRAY<STRUCT<book_id STRING, quantity BIGINT, subtotal BIGINT>>')))
        .select('key','create_ts','source_file','insert_ts','v.*')
    )