In [25]:
orders = spark.sql("SELECT * FROM lh_silver.orders")

order_items = spark.sql("SELECT * FROM lh_silver.order_items")

promotions = spark.sql("SELECT * FROM lh_silver.promotions")

profiles = spark.sql("SELECT * FROM lh_silver.profiles")

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 27, Finished, Available, Finished)

In [26]:
from datetime import datetime
from notebookutils import mssparkutils as notebookutils
import json

table_name = "gold_sales"
source_snapshot_ts = None
rows_in = None
rows_out = None
status = "Failed"
now = datetime.now()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 28, Finished, Available, Finished)

In [27]:
promotions.createOrReplaceTempView("promotions_temp")
order_items.createOrReplaceTempView("order_items")

orders_with_promotions = spark.sql("""
SELECT
    oi.order_id,
    oi.steamid64,
    oi.appid,
    oi.paidamount,
    oi.date,
    pt.promotion_id,
    pt.name, 
    COALESCE(ROUND(pt.percentage / 100 , 3), 0) AS discount_percentage ,   
    pt.start_date,
    pt.end_date
    FROM order_items oi
LEFT JOIN promotions_temp pt
    ON oi.appid = pt.game_id
   AND to_date(oi.date) BETWEEN pt.start_date AND pt.end_date
""")

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 29, Finished, Available, Finished)

In [28]:
rows_in = order_items.count()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 30, Finished, Available, Finished)

In [29]:
orders_with_promotions.count()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 31, Finished, Available, Finished)

8269226

In [30]:
orders.createOrReplaceTempView("orders_temp")
profiles.createOrReplaceTempView("customer_temp")

taxes_by_nation = spark.sql("""
    SELECT nationality, tax_percent
    FROM (
        SELECT 
            nationality,
            tax_percent,
            RANK() OVER (PARTITION BY nationality ORDER BY counts DESC) AS ranks
        FROM (
            SELECT 
                c.nationality,
                ROUND(o.taxes / o.amount * 100, 2) AS tax_percent,
                COUNT(*) AS counts
            FROM orders_temp o
            left JOIN customer_temp c ON o.steamid64 = c.steamid64
            WHERE o.taxes > 0 AND o.amount > 0
            GROUP BY c.nationality, ROUND(o.taxes / o.amount * 100, 2)
        )
    )
    WHERE ranks = 1
""")

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 32, Finished, Available, Finished)

In [31]:
taxes_by_nation.createOrReplaceTempView("taxes_by_nation")

unified_orders = spark.sql("""
    SELECT 
        o.order_id,
        o.steamid64,
        o.order_date,
        o.card_number,
        o.order_status_id,
        o.purchase_type_id,
        c.nationality,
        ROUND(COALESCE(t.tax_percent / 100.0, o.taxes/ o.amount, 0), 4) AS tax_percent
    FROM orders_temp o
    left JOIN customer_temp c ON o.steamid64 = c.steamid64
    left JOIN taxes_by_nation t ON c.nationality = t.nationality
""")

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 33, Finished, Available, Finished)

In [32]:
unified_orders.printSchema()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 34, Finished, Available, Finished)

root
 |-- order_id: string (nullable = true)
 |-- steamid64: long (nullable = true)
 |-- order_date: date (nullable = true)
 |-- card_number: long (nullable = true)
 |-- order_status_id: long (nullable = true)
 |-- purchase_type_id: long (nullable = true)
 |-- nationality: string (nullable = true)
 |-- tax_percent: double (nullable = true)



In [33]:
orders_with_promotions.printSchema()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 35, Finished, Available, Finished)

root
 |-- order_id: string (nullable = true)
 |-- steamid64: long (nullable = true)
 |-- appid: long (nullable = true)
 |-- paidamount: double (nullable = true)
 |-- date: date (nullable = true)
 |-- promotion_id: string (nullable = true)
 |-- name: string (nullable = true)
 |-- discount_percentage: double (nullable = false)
 |-- start_date: date (nullable = true)
 |-- end_date: date (nullable = true)



In [34]:
unified_orders.where('tax_percent is NULL').count()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 36, Finished, Available, Finished)

0

In [35]:
unified_orders.createOrReplaceTempView("unified_orders")
orders_with_promotions.createOrReplaceTempView("orders_with_promotions")


sales_fact = spark.sql("""
    SELECT
        COALESCE(u.order_id,  p.order_id)  AS order_id,
        COALESCE(u.steamid64, p.steamid64) AS steamid64,
        p.appid AS game_id,
        CAST(DATE_FORMAT(COALESCE(u.order_date, p.date), 'yyyyMMdd') AS INTEGER) AS order_date_id,
        u.card_number,
        p.paidamount as price,
        u.tax_percent,
        round(u.tax_percent * p.paidamount, 2) as taxes,
        u.order_status_id,
        u.purchase_type_id,
        u.nationality,


        
        p.date      ,
        p.promotion_id,
        p.discount_percentage     AS promotion_precentage,
        (1 - COALESCE(p.discount_percentage, 0) / 100.0)                         AS discount_multiplier,
        round((1 - COALESCE(p.discount_percentage, 0) / 100.0)  * p.paidamount, 2) as discount,   

        ROUND(p.paidamount * (1 - COALESCE(p.discount_percentage, 0) / 100.0) ,2)                   AS subtotal,

        ROUND(p.paidamount * (1 - COALESCE(p.discount_percentage, 0) / 100.0) + (COALESCE(u.tax_percent, 0) * p.paidamount * (1 - COALESCE(p.discount_percentage, 0) / 100.0)), 2) as total_price


    FROM
        unified_orders          u
        RIGHT JOIN         
        orders_with_promotions  p
        ON  u.order_id  = p.order_id
        AND u.steamid64 = p.steamid64
        AND u.order_date = p.date
""")


StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 37, Finished, Available, Finished)

In [36]:
sales_fact.count()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 38, Finished, Available, Finished)

8269226

In [37]:
orders_order_status = spark.sql("SELECT * FROM lh_silver.orders_order_status")

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 39, Finished, Available, Finished)

In [38]:
orders_purchase_type = spark.sql("SELECT * FROM lh_silver.orders_purchase_type")

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 40, Finished, Available, Finished)

In [39]:
orders_purchase_type.printSchema()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 41, Finished, Available, Finished)

root
 |-- id: long (nullable = true)
 |-- purchase_type: string (nullable = true)



In [40]:
orders_order_status.printSchema()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 42, Finished, Available, Finished)

root
 |-- id: long (nullable = true)
 |-- order_status: string (nullable = true)



In [41]:
sales_fact_final = (
    sales_fact
    .join(orders_order_status, sales_fact.order_status_id == orders_order_status.id)
    .select(
        *sales_fact.columns,  # Select all columns from sales_fact,
        orders_order_status['order_status']  # Select 'order_status' from orders_order_status

    )
)



StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 43, Finished, Available, Finished)

In [42]:
sales_fact_final_2 = sales_fact_final.join(orders_purchase_type, [orders_purchase_type.id == sales_fact_final.purchase_type_id]).select(
        *sales_fact_final.columns, 
        orders_purchase_type['purchase_type'] 
    )

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 44, Finished, Available, Finished)

In [43]:
display(sales_fact_final_2)

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 45, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 555798a4-b2ae-41b1-82b0-7a5ff7a9fa50)

In [44]:
reordered_columns = [
    'order_id', 'steamid64', 'game_id',
    'order_date_id', 'card_number', 'price', 'tax_percent', 'taxes', 
    'nationality', 'promotion_id', 'promotion_precentage', 
    'discount_multiplier', 'discount', 'subtotal', 'total_price', 
    'order_status', 'purchase_type' 
]

sales_fact_final_2_reordered = sales_fact_final_2.select(*reordered_columns)

sales_fact_final_2_reordered.printSchema()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 46, Finished, Available, Finished)

root
 |-- order_id: string (nullable = true)
 |-- steamid64: long (nullable = true)
 |-- game_id: long (nullable = true)
 |-- order_date_id: integer (nullable = true)
 |-- card_number: long (nullable = true)
 |-- price: double (nullable = true)
 |-- tax_percent: double (nullable = true)
 |-- taxes: double (nullable = true)
 |-- nationality: string (nullable = true)
 |-- promotion_id: string (nullable = true)
 |-- promotion_precentage: double (nullable = false)
 |-- discount_multiplier: double (nullable = true)
 |-- discount: double (nullable = true)
 |-- subtotal: double (nullable = true)
 |-- total_price: double (nullable = true)
 |-- order_status: string (nullable = true)
 |-- purchase_type: string (nullable = true)



In [45]:
rows_out = sales_fact_final_2_reordered.count()

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 47, Finished, Available, Finished)

In [46]:
try:

    sales_fact_final_2_reordered.write.mode('overwrite').option("overwriteSchema", "true").format("delta").saveAsTable('sales_fact')
    status = "Succeeded"
except Exception as e:
    print("Write failed:", e)
    status = "Failed"


StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 48, Finished, Available, Finished)

In [47]:
freshness_hours = None
freshness_sla_ok = None
completeness_ratio = None
completeness_sla_ok = None
sla_met = None

if status.lower() == "succeeded":
    if source_snapshot_ts:
        freshness_hours = (now - source_snapshot_ts).total_seconds() / 3600
        freshness_sla_ok = freshness_hours <= 12

    if rows_in and rows_out and rows_in > 0:
        completeness_ratio = rows_out / rows_in
        completeness_sla_ok = completeness_ratio >= 0.98

    if freshness_sla_ok is not None and completeness_sla_ok is not None:
        sla_met = freshness_sla_ok and completeness_sla_ok

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 49, Finished, Available, Finished)

In [48]:
notebookutils.notebook.exit(json.dumps({
    "table_name": table_name,
    "rows_in": rows_in,
    "rows_out": rows_out,
    "source_snapshot_ts": source_snapshot_ts.strftime("%Y-%m-%d %H:%M:%S") if source_snapshot_ts else None,
    "status": status,
    "sla_met": sla_met
}))

StatementMeta(, bc9d5e76-11bc-45f8-98e8-c6961d93bbc5, 50, Finished, Available, Finished)

ExitValue: {"table_name": "gold_sales", "rows_in": 8269226, "rows_out": 7182534, "source_snapshot_ts": null, "status": "Succeeded", "sla_met": null}