In [0]:
%run ../../config/variables

In [0]:
ORIGIN_TABLE_SALES="slv_sales"
ORIGIN_TABLE_CUSTOMERS="slv_customers"
ORIGIN_TABLE_EMPLOYEES="slv_employees"
TARGET_TABLE="gld_sales"

# Read Streaming

### Load Dimension Tables

In [0]:
df_customers=spark.read.table(f"{catalog_name}.{silver_schema_name}.{ORIGIN_TABLE_CUSTOMERS}")
df_customers=df_customers.select(["customer_id","name"]).withColumnRenamed("name", "customer")

df_employees=spark.read.table(f"{catalog_name}.{silver_schema_name}.{ORIGIN_TABLE_EMPLOYEES}")
df_employees=df_employees.select(["employee_id","name"]).withColumnRenamed("name", "employee")

### Read Streaming

In [0]:
df_sales = spark.readStream.format('delta').option("maxFilesPerTrigger", 10).table(f"{catalog_name}.{silver_schema_name}.{ORIGIN_TABLE_SALES}")

### Define Final DataFrame

In [0]:
from pyspark.sql.functions import col

df=(df_sales.join(df_customers, on="customer_id", how="inner")
    .join(df_employees, on="employee_id", how="inner")
)
df=df.select(
    "order_id"
    ,"employee"
    ,"customer"
    ,"latitude"
    ,"longitude"
    ,"district"
    ,"quantity_products"
    ,"partition_date"
    ,"event_hour"
    ,"event_day"
    ,"event_month"
    ,"event_year"
    ,col("event_date").cast("date")
)

# Write streaming

In [0]:
(df.writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", gold_checkpoint_path)
    .trigger(availableNow=True)
    .table(f"{catalog_name}.{gold_schema_name}.{TARGET_TABLE}")
    .awaitTermination()
)