### **Spark Stream Processing**

In [0]:
# Define list of tables to process
entities = ["customers", "drivers", "locations", "payments", "trips", "vehicles"]

try:
    for entity in entities:
        print(f"Started processing entity: {entity}")

        # Read all the files to prepare a schema
        df_entity = spark.read.format("csv")\
                              .option("header", True)\
                              .option("inferSchema", True)\
                              .load(f"/Volumes/pysparkdbt/source/source_data/{entity}")

        df_schema = df_entity.schema

        # Reading source stream
        df_source_stream = spark.readStream.format("csv")\
                                           .option("header", True)\
                                           .schema(df_schema)\
                                           .load(f"/Volumes/pysparkdbt/source/source_data/{entity}")

        # writing stream to bronze delta table
        df_source_stream.writeStream.format("delta")\
                                    .outputMode("append")\
                                    .option("checkpointLocation", f"/Volumes/pysparkdbt/bronze/checkpoint/{entity}")\
                                    .trigger(once=True)\
                                    .toTable(f"pysparkdbt.bronze.{entity}")

        print(f"Sucessfully processed entity: {entity}")\

except Exception as e:
    print(f"Error Occurres while processing entity: {entity}")
    print(f"Error occurred during Stream Processing: {e}")

Started processing entity: customers
Sucessfully processed entity: customers
Started processing entity: drivers
Sucessfully processed entity: drivers
Started processing entity: locations
Sucessfully processed entity: locations
Started processing entity: payments
Sucessfully processed entity: payments
Started processing entity: trips
Sucessfully processed entity: trips
Started processing entity: vehicles
Sucessfully processed entity: vehicles
