In [0]:
# Silver layer path variables
fact_sale_path = "/mnt/silver/fact_sale"
dim_month_path = "/mnt/silver/dim_month"
dim_flavor_path = "/mnt/silver/dim_flavor"
dim_package_path = "/mnt/silver/dim_package"
dim_channel_group_path = "/mnt/silver/dim_channel_group"

# Gold Layer path variables
agg_sales_by_region_trade_group_path = "/mnt/gold/agg_sales_by_region_trade_group"
agg_sales_by_brand_month_path = "/mnt/gold/agg_sales_by_brand_month"
agg_lowest_brand_by_region_path = "/mnt/gold/agg_lowest_brand_by_region"


In [0]:
# Top 3 Trade Groups for each Region in sales ($ Volume)
spark.sql(f"""
CREATE OR REPLACE TABLE delta.`{agg_sales_by_region_trade_group_path}`
USING DELTA AS
WITH ranked_trade_groups AS (
    SELECT 
        fs.region_desc AS Bitr_Org_LVL_C_Desc,
        dcg.trade_group_desc AS TRADE_GROUP_DESC,
        SUM(fs.sales_volume) AS total_sales_volume,
        ROW_NUMBER() OVER (PARTITION BY fs.region_desc ORDER BY SUM(fs.sales_volume) DESC) AS rank
    FROM delta.`{fact_sale_path}` fs
        JOIN delta.`{dim_channel_group_path}` dcg ON fs.channel_id = dcg.channel_id
    GROUP BY fs.region_desc, dcg.trade_group_desc
)
SELECT 
    Bitr_Org_LVL_C_Desc,
    TRADE_GROUP_DESC,
    total_sales_volume
FROM ranked_trade_groups
WHERE rank <= 3
ORDER BY Bitr_Org_LVL_C_Desc, rank
""")

DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

In [0]:
# Sales each Brand per month
spark.sql(f"""
CREATE OR REPLACE TABLE delta.`{agg_sales_by_brand_month_path}`
USING DELTA AS
SELECT 
    df.flavor_description AS BRAND_NM,
    fs.year,
    dm.month_id,
    dm.month_name AS month,
    SUM(fs.sales_volume) AS total_sales
FROM delta.`{fact_sale_path}` fs
    JOIN delta.`{dim_month_path}` dm ON fs.month_id = dm.month_id
    JOIN delta.`{dim_flavor_path}` df ON fs.flavor_id = df.flavor_id
GROUP BY df.flavor_description, fs.year,dm.month_id, dm.month_name
ORDER BY fs.year, dm.month_id, total_sales DESC
""")

DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

In [0]:
# Lowest brand in sales for each Region
spark.sql(f"""
CREATE OR REPLACE TABLE delta.`{agg_lowest_brand_by_region_path}`
USING DELTA AS
WITH ranked_brands AS (
    SELECT 
        fs.region_desc AS Bitr_Org_LVL_C_Desc,
        df.flavor_description AS BRAND_NM,
        SUM(fs.sales_volume) AS total_sales,
        ROW_NUMBER() OVER (PARTITION BY fs.region_desc ORDER BY SUM(fs.sales_volume) ASC) AS rank
    FROM delta.`{fact_sale_path}` fs
        JOIN delta.`{dim_flavor_path}` df ON fs.flavor_id = df.flavor_id
    GROUP BY fs.region_desc, df.flavor_description
)
SELECT 
    Bitr_Org_LVL_C_Desc AS region,
    BRAND_NM,
    total_sales
FROM ranked_brands
WHERE rank = 1
ORDER BY Bitr_Org_LVL_C_Desc, BRAND_NM
""")

DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

In [0]:
spark.sql(f"SELECT * FROM delta.`{agg_sales_by_region_trade_group_path}` LIMIT 10").show()


+-------------------+----------------+------------------+
|Bitr_Org_LVL_C_Desc|TRADE_GROUP_DESC|total_sales_volume|
+-------------------+----------------+------------------+
|             CANADA|         GROCERY| 168766.8300000001|
|             CANADA|        SERVICES| 83769.16999999998|
|             CANADA|        ACADEMIC|53134.440000000046|
|        GREAT LAKES|         GROCERY| 380355.6299999994|
|        GREAT LAKES|        SERVICES|157815.51999999987|
|        GREAT LAKES|        ACADEMIC|152051.44999999998|
|            MIDWEST|         GROCERY|326538.02999999933|
|            MIDWEST|        SERVICES|129280.47999999998|
|            MIDWEST|        ACADEMIC| 88605.36999999994|
|          NORTHEAST|         GROCERY| 403785.5699999997|
+-------------------+----------------+------------------+



In [0]:
spark.sql(f"SELECT * FROM delta.`{agg_sales_by_brand_month_path}` LIMIT 10").show()


+-----------+----+--------+--------+------------------+
|   BRAND_NM|year|month_id|   month|       total_sales|
+-----------+----+--------+--------+------------------+
|      LEMON|2006|       1| January|507073.77999999805|
|  RASPBERRY|2006|       1| January| 395317.6199999987|
| STRAWBERRY|2006|       1| January| 315988.6899999993|
|      LEMON|2006|       2|February| 552125.5499999976|
|  RASPBERRY|2006|       2|February| 410339.5199999982|
| STRAWBERRY|2006|       2|February|345956.93999999925|
|      LEMON|2006|       3|   March| 766990.9599999957|
|  RASPBERRY|2006|       3|   March| 588382.7199999978|
| STRAWBERRY|2006|       3|   March| 475823.2999999985|
|      GRAPE|2006|       3|   March|               7.5|
+-----------+----+--------+--------+------------------+



In [0]:
spark.sql(f"SELECT * FROM delta.`{agg_lowest_brand_by_region_path}` LIMIT 10").show()


+-----------+-----------+------------------+
|     region|   BRAND_NM|       total_sales|
+-----------+-----------+------------------+
|     CANADA| STRAWBERRY| 74209.37000000004|
|GREAT LAKES| STRAWBERRY|208918.15999999983|
|    MIDWEST| STRAWBERRY|149246.31999999986|
|  NORTHEAST| STRAWBERRY| 195695.9699999998|
|  SOUTHEAST|  RASPBERRY|         224057.24|
|  SOUTHWEST|      GRAPE|               7.5|
|       WEST| STRAWBERRY| 135077.2199999999|
+-----------+-----------+------------------+

