In [0]:
%sql

with sor as (
  select 
    country, cast(invoicedate as date) invoicedate
    , price, quantity
    , invoice
  from lab_2026.silver_online_retail
)
select 
  country, invoicedate 
  , sum(price * quantity) total_revenue
  , count(distinct invoice) total_orders
from sor
group by 1,2
LIMIT 10

In [0]:
%sql

DROP TABLE IF EXISTS lab_2026.gold_daily_sales;

CREATE TABLE IF NOT EXISTS lab_2026.gold_daily_sales(
  country string, 
  invoice_date date, 
  total_revenue decimal(38,2),
  total_orders long,
  _insert_timestamp timestamp,
  _update_timestamp timestamp
);

In [0]:
%sql

DESCRIBE EXTENDED lab_2026.gold_daily_sales;

In [0]:
from pyspark.sql import functions as F, types as T

online_retail_df = (
    spark.read.table('lab_2026.silver_online_retail')
    .withColumn('invoice_date', F.col('invoicedate').cast(T.DateType()))
    .groupBy('country', 'invoice_date')
    .agg(
        F.sum(F.col('price') * F.col('quantity')).alias('total_revenue'),
        F.countDistinct(F.col('invoice')).alias('total_orders')
    )
    .withColumn('_insert_timestamp', F.current_timestamp())
    .withColumn('_update_timestamp', F.current_timestamp())
)

online_retail_df.limit(5).display()

In [0]:
from delta.tables import DeltaTable

(
    DeltaTable
    .forName(spark, 'lab_2026.gold_daily_sales')
    .alias('gold')
    .merge(
        online_retail_df.alias('src')
        , 'gold.country = src.country AND gold.invoice_date = src.invoice_date'
    )
    .whenMatchedUpdate(
        set = {
            'total_revenue': 'src.total_revenue'
            , 'total_orders': 'src.total_orders'
            , '_update_timestamp': 'src._update_timestamp'
        }
    )
    .whenNotMatchedInsertAll()
    .execute()
)

In [0]:
%sql

select * from lab_2026.gold_daily_sales limit 10;