Create Gold Schema

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS ecommerce_catalog.ecommerce_gold;


Customer Dimension (SCD Type 2)

We track historical changes in customer data (city, country, job title, etc.).

In [0]:
%sql
CREATE TABLE IF not EXISTS ecommerce_catalog.ecommerce_gold.dim_customer
USING DELTA
AS
SELECT
    customer_id,
    name,
    email,
    city,
    country,
    gender,
    job_title,
    date_of_birth,
    current_date() AS effective_from,
    CAST(NULL AS DATE) AS effective_to,  -- correct data type
    true AS is_current
FROM ecommerce_catalog.ecommerce_silver.customers;


Merge Data from Silver (SCD Type 2 Logic)
Code (PySpark)
Reads customer data from the silver layer.

In [0]:
%sql
MERGE INTO ecommerce_catalog.ecommerce_gold.dim_customer AS tgt
USING ecommerce_catalog.ecommerce_silver.customers AS src
ON tgt.customer_id = src.customer_id AND tgt.is_current = true

WHEN MATCHED AND (
    tgt.city <> src.city OR
    tgt.country <> src.country OR
    tgt.job_title <> src.job_title
)
THEN UPDATE SET
    tgt.effective_to = current_date() - 1,
    tgt.is_current = false

WHEN NOT MATCHED
THEN INSERT (
    customer_id,
    name,
    email,
    city,
    country,
    gender,
    job_title,
    date_of_birth,
    effective_from,
    effective_to,
    is_current
)
VALUES (
    src.customer_id,
    src.name,
    src.email,
    src.city,
    src.country,
    src.gender,
    src.job_title,
    src.date_of_birth,
    current_date(),
    NULL,
    true
);


Optimize Table with Z-Ordering
Improves performance when dashboards filter by customer_id.


In [0]:
%sql
OPTIMIZE ecommerce_catalog.ecommerce_gold.dim_customer
ZORDER BY (customer_id);


Product Dimension
Creates a clean product dimension used for analytics.

In [0]:
%sql
CREATE OR REPLACE TABLE ecommerce_catalog.ecommerce_gold.dim_product
USING DELTA
AS
SELECT
    product_id,
    category,
    sub_category,
    color,
    sizes,
    production_cost
FROM ecommerce_catalog.ecommerce_silver.products;


OPTIMIZE Product order by Product_id

In [0]:
%sql
OPTIMIZE ecommerce_catalog.ecommerce_gold.dim_product
ZORDER BY (product_id);


Date Dimension. Creates a calendar table for time-based analysis.

In [0]:
%sql
CREATE OR REPLACE TABLE ecommerce_catalog.ecommerce_gold.dim_date
USING DELTA
AS
SELECT DISTINCT
    date_format(date, 'yyyyMMdd') AS date_key,
    date AS full_date,
    year(date) AS year,
    month(date) AS month,
    day(date) AS day,
    weekofyear(date) AS week,
    date_format(date, 'EEEE') AS day_name
FROM ecommerce_catalog.ecommerce_silver.transactions;


Sales Fact Table

Simple explanation :

Central fact table

Joins dimensions using surrogate keys

One row per invoice + line

KPI Tables for Dashboards. 

Used directly in a Top Products dashboard.

In [0]:
%sql
CREATE OR REPLACE TABLE ecommerce_catalog.ecommerce_gold.top_selling_skus
USING DELTA
AS
SELECT
    product_id,
    SUM(quantity) AS total_quantity_sold,
    SUM(line_total) AS total_revenue
FROM ecommerce_catalog.ecommerce_gold.fact_sales
GROUP BY product_id;


Customer Lifetime Value (CLV)

Calculates how much revenue each customer generated over time.

In [0]:
%sql
CREATE OR REPLACE TABLE ecommerce_catalog.ecommerce_gold.customer_lifetime_value
USING DELTA
AS
SELECT
    customer_id,                   -- use customer_id instead of customer_key
    SUM(line_total) AS lifetime_value,
    COUNT(DISTINCT invoice_id) AS total_orders
FROM ecommerce_catalog.ecommerce_gold.fact_sales
GROUP BY customer_id;


dim_store


In [0]:
%sql
CREATE OR REPLACE TABLE ecommerce_catalog.ecommerce_gold.dim_store
USING DELTA
AS
SELECT
    store_id,
    country,
    city,
    store_name,
    number_of_employees,
    zip_code,
    latitude,
    longitude
FROM ecommerce_catalog.ecommerce_silver.stores;

-- Optimize for dashboard performance
OPTIMIZE ecommerce_catalog.ecommerce_gold.dim_store
ZORDER BY (store_id);


In [0]:
%sql
CREATE OR REPLACE TABLE ecommerce_catalog.ecommerce_gold.dim_employee
USING DELTA
AS
SELECT
    employee_id,
    store_id,
    name,
    position
FROM ecommerce_catalog.ecommerce_silver.employees;

-- Optimize for performance
OPTIMIZE ecommerce_catalog.ecommerce_gold.dim_employee
ZORDER BY (employee_id);


In [0]:
%sql
CREATE OR REPLACE TABLE ecommerce_catalog.ecommerce_gold.inventory_alerts
USING DELTA
AS
SELECT
    product_id,
    SUM(quantity) AS total_sold,
    100 - SUM(quantity) AS stock_remaining -- example threshold
FROM ecommerce_catalog.ecommerce_gold.fact_sales
GROUP BY product_id
HAVING stock_remaining < 20;  -- low stock threshold


In [0]:
%sql
SHOW TABLES IN ecommerce_catalog.ecommerce_gold

Dimension Table Customer  

In [0]:
%sql
SELECT * 
FROM ecommerce_catalog.ecommerce_gold.dim_customer
LIMIT 10;

Dimension Table Date

In [0]:
%sql
SELECT * 
FROM ecommerce_catalog.ecommerce_gold.dim_date
LIMIT 10;

Dimension Table Employee

In [0]:
%sql
SELECT * 
FROM ecommerce_catalog.ecommerce_gold.dim_employee
LIMIT 10;

Dimension Table Product


In [0]:
%sql
SELECT * 
FROM ecommerce_catalog.ecommerce_gold.dim_product
LIMIT 10;

Dimension Table Store


In [0]:
%sql
SELECT * 
FROM ecommerce_catalog.ecommerce_gold.dim_store
LIMIT 10;

Top Selling Product and their Revenue

In [0]:
%sql
SELECT * 
FROM ecommerce_catalog.ecommerce_gold.top_selling_skus
LIMIT 10;       


Customer Lifetime Value

In [0]:
%sql
SELECT * 
FROM ecommerce_catalog.ecommerce_gold.customer_lifetime_value
LIMIT 10;

Inventory Alert

In [0]:
%sql
SELECT * 
FROM ecommerce_catalog.ecommerce_gold.inventory_alerts
LIMIT 10;


Fact Table for Sales


In [0]:
%sql
SELECT * 
FROM ecommerce_catalog.ecommerce_gold.fact_sales
LIMIT 10;   