### Importing the Needed Modules

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

import sys
sys.path.append('/Workspace/Users/mohammedthoufiq9360@gmail.com/Retail-And-Ecommerce-Analytics-Platform')

from src.paths import SILVER_DISCOUNTS_PATH, DIM_DISCOUNTS_PATH
from src.schema_definitions import DIM_DISCOUNTS_SCHEMA
from src.utils import add_gold_metadata
from delta.tables import DeltaTable

### Querying Silver Discounts Table

In [0]:
disc_silver_df = spark.read.table(SILVER_DISCOUNTS_PATH)
disc_silver_df.limit(5).display()

discount_start_date,discount_end_date,discount,description,category,sub_category,ingestion_ts,_source_file
2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Feminine,Coats and Blazers,2026-01-14T05:41:44.554Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/discounts.csv
2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Feminine,Sweaters and Knitwear,2026-01-14T05:41:44.554Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/discounts.csv
2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Masculine,Coats and Blazers,2026-01-14T05:41:44.554Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/discounts.csv
2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Masculine,Sweaters and Sweatshirts,2026-01-14T05:41:44.554Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/discounts.csv
2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Children,Coats,2026-01-14T05:41:44.554Z,dbfs:/Volumes/retail_analytics/raw/kaggle/global_fashion/discounts.csv


### Dim_discounts Schema Reference

In [0]:
DIM_DISCOUNTS_SCHEMA

{'discount_sk': 'long',
 'discount_start_date': 'date',
 'discount_end_date': 'date',
 'discount': 'double',
 'description': 'string',
 'category': 'string',
 'sub_category': 'string',
 '_created_at': 'timestamp',
 '_updated_at': 'timestamp'}

### Selecting the Needed columns for dim_discounts

In [0]:
disc_silver_df = disc_silver_df.select("discount_start_date", "discount_end_date", "discount", "description", "category", "sub_category")

### Creating metadata columns : _created_at and _updated_at

In [0]:
dim_disc_df = add_gold_metadata(disc_silver_df)

### Creating Dim_discounts Table with surrogate key

In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {DIM_DISCOUNTS_PATH} (
    discount_sk BIGINT GENERATED ALWAYS AS IDENTITY,
    discount_start_date DATE,
    discount_end_date DATE,
    discount DOUBLE,
    description STRING,
    category STRING,
    sub_category STRING,
    _created_at TIMESTAMP,
    _updated_at TIMESTAMP
)
USING DELTA
""")

DataFrame[]

### Updating the Dim_discounts Table

In [0]:
dim_disc_tbl = DeltaTable.forName(spark, DIM_DISCOUNTS_PATH)

dim_disc_tbl.alias("tgt").merge(
    dim_disc_df.alias("src"),
    """
    tgt.discount_start_date = src.discount_start_date AND
    tgt.discount_end_date   = src.discount_end_date   AND
    tgt.discount            = src.discount            AND
    tgt.category            = src.category            AND
    tgt.sub_category        = src.sub_category
    """
).whenMatchedUpdate(set={
    "tgt.description": "src.description",
    "tgt._updated_at": "src._updated_at"
}).whenNotMatchedInsert(values={
    "tgt.discount_start_date": "src.discount_start_date",
    "tgt.discount_end_date": "src.discount_end_date",
    "tgt.discount": "src.discount",
    "tgt.description": "src.description",
    "tgt.category": "src.category",
    "tgt.sub_category": "src.sub_category",
    "tgt._created_at": "src._created_at",
    "tgt._updated_at": "src._updated_at"
}).execute()


DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

In [0]:
spark.read.table(DIM_DISCOUNTS_PATH).limit(5).display()

discount_sk,discount_start_date,discount_end_date,discount,description,category,sub_category,_created_at,_updated_at
1,2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Feminine,Coats and Blazers,2026-01-17T13:45:37.180Z,2026-01-17T13:45:37.180Z
2,2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Feminine,Sweaters and Knitwear,2026-01-17T13:45:37.180Z,2026-01-17T13:45:37.180Z
3,2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Masculine,Coats and Blazers,2026-01-17T13:45:37.180Z,2026-01-17T13:45:37.180Z
4,2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Masculine,Sweaters and Sweatshirts,2026-01-17T13:45:37.180Z,2026-01-17T13:45:37.180Z
5,2020-01-01,2020-01-10,0.4,40% discount during our New Year Winter Sale,Children,Coats,2026-01-17T13:45:37.180Z,2026-01-17T13:45:37.180Z


In [0]:
spark.read.table(DIM_DISCOUNTS_PATH).count()

181