In [0]:
%sql
WITH online_retail_cte AS (
  SELECT 
    invoice, stockcode, customer_id
    , quantity, price
    , invoicedate, country, description
    , ROW_NUMBER() OVER(
      PARTITION BY invoice, stockcode
      ORDER BY 
        invoicedate DESC
        , _ingest_timestamp DESC
    ) rw
  FROM lab_2026.bronze_online_retail
  WHERE quantity > 0.0 AND price > 0.0
)
SELECT 
  invoice, stockcode, customer_id
  , quantity, price
  , invoicedate, country, description
FROM online_retail_cte WHERE rw = 1
limit 10;

invoice,stockcode,customer_id,quantity,price,invoicedate,country,description
489436,22107,13078.0,4,3.75,2009-12-01 09:06:00,United Kingdom,PIZZA PLATE IN BOX
489437,21989,15362.0,12,0.85,2009-12-01 09:08:00,United Kingdom,PACK OF 20 SKULL PAPER NAPKINS
489437,22130,15362.0,12,0.85,2009-12-01 09:08:00,United Kingdom,PARTY CONE CHRISTMAS DECORATION
489438,21411,18102.0,32,2.5,2009-12-01 09:24:00,United Kingdom,GINGHAM HEART DOORSTOP RED
489438,85183A,18102.0,60,2.4,2009-12-01 09:24:00,United Kingdom,CHARLIE & LOLA WASTEPAPER BIN BLUE
489438,85183B,18102.0,60,2.4,2009-12-01 09:24:00,United Kingdom,CHARLIE & LOLA WASTEPAPER BIN FLORA
489439,21491,12682.0,6,1.95,2009-12-01 09:28:00,France,SET OF THREE VINTAGE GIFT WRAPS
489439,85216,12682.0,12,0.65,2009-12-01 09:28:00,France,ASSORTED CAKES FRIDGE MAGNETS
489440,22349,18087.0,8,3.75,2009-12-01 09:43:00,United Kingdom,"DOG BOWL , CHASING BALL DESIGN"
489442,21582,13635.0,20,1.65,2009-12-01 09:46:00,United Kingdom,KINGS CHOICE SMALL TUBE MATCHES


In [0]:
from pyspark.sql import functions as F, Window as W

_duplicate_window = (
    W
    .partitionBy('invoice', 'stockcode')
    .orderBy(
        F.col('invoicedate').desc()
        , F.col('_ingest_timestamp').desc()
    )
)
online_retail_df = (
    spark.read.table('lab_2026.bronze_online_retail')
    .selectExpr(
        "invoice", "stockcode", "CAST(CAST(customer_id AS NUMERIC) AS BIGINT) customer_id"
        , "CAST(quantity AS BIGINT) quantity", "CAST(price AS DECIMAL(10, 2)) price"
        , "CAST(invoicedate AS TIMESTAMP) invoicedate", "country", "description"
        , '_ingest_timestamp'
    )
    .withColumn('rw', F.row_number().over(_duplicate_window))
    .filter(F.col('rw') == 1)
    .drop('rw', '_ingest_timestamp')
)

online_retail_df.limit(10).display()

invoice,stockcode,customer_id,quantity,price,invoicedate,country,description
489436,22107,13078,4,3.75,2009-12-01T09:06:00.000Z,United Kingdom,PIZZA PLATE IN BOX
489437,21989,15362,12,0.85,2009-12-01T09:08:00.000Z,United Kingdom,PACK OF 20 SKULL PAPER NAPKINS
489437,22130,15362,12,0.85,2009-12-01T09:08:00.000Z,United Kingdom,PARTY CONE CHRISTMAS DECORATION
489438,21411,18102,32,2.5,2009-12-01T09:24:00.000Z,United Kingdom,GINGHAM HEART DOORSTOP RED
489438,85183A,18102,60,2.4,2009-12-01T09:24:00.000Z,United Kingdom,CHARLIE & LOLA WASTEPAPER BIN BLUE
489438,85183B,18102,60,2.4,2009-12-01T09:24:00.000Z,United Kingdom,CHARLIE & LOLA WASTEPAPER BIN FLORA
489439,21491,12682,6,1.95,2009-12-01T09:28:00.000Z,France,SET OF THREE VINTAGE GIFT WRAPS
489439,85216,12682,12,0.65,2009-12-01T09:28:00.000Z,France,ASSORTED CAKES FRIDGE MAGNETS
489440,22349,18087,8,3.75,2009-12-01T09:43:00.000Z,United Kingdom,"DOG BOWL , CHASING BALL DESIGN"
489442,21582,13635,20,1.65,2009-12-01T09:46:00.000Z,United Kingdom,KINGS CHOICE SMALL TUBE MATCHES


In [0]:
%sql

DROP TABLE IF EXISTS lab_2026.silver_online_retail;

CREATE TABLE IF NOT EXISTS lab_2026.silver_online_retail(
  invoice VARCHAR(256), stockcode VARCHAR(256), customer_id BIGINT
  , quantity BIGINT, price DECIMAL(10, 2)
  , invoicedate TIMESTAMP, country VARCHAR(256), description STRING
  , _insert_timestamp TIMESTAMP, _update_timestamp TIMESTAMP
);

In [0]:
%sql

DESC FORMATTED lab_2026.silver_online_retail;

col_name,data_type,comment
invoice,varchar(256),
stockcode,varchar(256),
customer_id,bigint,
quantity,bigint,
price,"decimal(10,2)",
invoicedate,timestamp,
country,varchar(256),
description,string,
_insert_timestamp,timestamp,
_update_timestamp,timestamp,


In [0]:
from delta.tables import DeltaTable

(
    DeltaTable
    .forName(spark, 'lab_2026.silver_online_retail')
    .alias('silver')
    .merge(
        online_retail_df.alias('src')
        , 'src.invoice = silver.invoice AND src.stockcode = silver.stockcode'
    )
    .whenMatchedUpdate(
        set = {
            'customer_id': 'src.customer_id'
            , 'quantity': 'src.quantity'
            , 'price': 'src.price'
            , 'invoicedate': 'src.invoicedate'
            , 'country': 'src.country'
            , 'description': 'src.description'
            , '_update_timestamp': F.current_timestamp()
        }
    )
    .whenNotMatchedInsert(
        values = {
            'invoice': 'src.invoice'
            , 'stockcode': 'src.stockcode'
            , 'customer_id': 'src.customer_id'
            , 'quantity': 'src.quantity'
            , 'price': 'src.price'
            , 'invoicedate': 'src.invoicedate'
            , 'country': 'src.country'
            , 'description': 'src.description'
            , '_insert_timestamp': F.current_timestamp()
            , '_update_timestamp': F.current_timestamp()
        }
    )
    .execute()
)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

In [0]:
%sql

select * from lab_2026.silver_online_retail limit 10;

invoice,stockcode,customer_id,quantity,price,invoicedate,country,description,_insert_timestamp,_update_timestamp
489537,20972,14040.0,5,1.25,2009-12-01T12:14:00.000Z,United Kingdom,PINK CREAM FELT CRAFT TRINKET BOX,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
489599,22139,12758.0,12,4.95,2009-12-01T14:40:00.000Z,Portugal,RETRO SPOT TEA SET CERAMIC 11 PC,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
489607,84519B,,1,6.04,2009-12-01T14:49:00.000Z,United Kingdom,CARROT CHARLIE+LOLA COASTER SET,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
489667,84946,16563.0,12,1.25,2009-12-01T18:35:00.000Z,United Kingdom,ANTIQUE SILVER TEA GLASS ETCHED,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
489856,20617,,1,4.3,2009-12-02T14:36:00.000Z,United Kingdom,FIRST CLASS PASSPORT COVER,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
489856,21641,,1,1.7,2009-12-02T14:36:00.000Z,United Kingdom,ASSORTED TUTTI FRUTTI KEYRING BALL,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
489856,90005A,,1,2.16,2009-12-02T14:36:00.000Z,United Kingdom,JADE GREEN ENAMEL HAIR COMB,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
489857,21975,,1,1.3,2009-12-02T14:43:00.000Z,United Kingdom,PACK OF 60 DINOSAUR CAKE CASES,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
490011,22196,16918.0,2,0.85,2009-12-03T12:27:00.000Z,United Kingdom,SMALL HEART MEASURING SPOONS,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
490064,21509,16139.0,12,0.42,2009-12-03T13:51:00.000Z,United Kingdom,COWBOYS AND INDIANS BIRTHDAY CARD,2026-01-01T19:20:35.743Z,2026-01-02T03:21:01.997Z
