# Cr√©ation de la session Spark

In [None]:
from pyspark.sql import SparkSession

# -----------------------------------------------------
# 1. D√©marrage de la session Spark
# -----------------------------------------------------
spark = SparkSession.builder.appName("Generate Gold").getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
print("Spark session initialis√©e avec Iceberg et MinIO.")

# üóÉÔ∏è Cr√©ation du namespace gold
spark.sql('CREATE NAMESPACE IF NOT EXISTS retail.gold').show()
print(f"Namespace Iceberg cr√©√©e : gold")

#  ü™ô Gold : `agg_daily_sales_by_store`  
## Agr√©gation quotidienne par magasin ‚Äî KPI op√©rationnels  
## ‚Üí Dashboard magasin / performance quotidienne

In [None]:

spark.sql("""
CREATE OR REPLACE TABLE retail.gold.agg_daily_sales_by_store
COMMENT 'KPI journaliers par magasin ‚Äî couche Gold'
AS
SELECT
  sale_date,
  store_id,
  store_name,
  city,
  COUNT(*) AS n_transactions,
  COUNT(DISTINCT product_id) AS n_products_sold,
  SUM(quantity) AS units_sold,
  SUM(revenue) AS total_revenue,
  SUM(gross_profit) AS total_gross_profit,
  SUM(gross_profit) / NULLIF(SUM(revenue), 0) AS gross_margin_pct,
  AVG(revenue) AS avg_basket_size,
  COUNT(DISTINCT salesperson) AS n_salespeople,
  
  -- üìà Calculs avanc√©s
  SUM(CASE WHEN is_discounted THEN revenue ELSE 0 END) / NULLIF(SUM(revenue), 0) AS discount_revenue_share,
  SUM(CASE WHEN margin_status = 'Perte' THEN 1 ELSE 0 END) AS n_loss_transactions

FROM retail.silver.clean_sales_enriched
GROUP BY 1, 2, 3, 4
""")

print("‚úÖ Table Gold cr√©√©e : retail.gold.agg_daily_sales_by_store")
spark.sql("SELECT * FROM retail.gold.agg_daily_sales_by_store ORDER BY sale_date DESC LIMIT 3").show()

#  ü™ô Gold : `product_performance_mtd`  
## Performance produit *Month-To-Date* ‚Äî pour pilotage assortiment  
## ‚Üí Dashboard merchandising / pricing

In [None]:

spark.sql("""
CREATE OR REPLACE TABLE retail.gold.product_performance_mtd
COMMENT 'Performance produits MTD ‚Äî couche Gold'
AS
WITH mtd_sales AS (
  SELECT
    product_id,
    product_name,
    category,
    brand,
    SUM(quantity) AS units_sold_mtd,
    SUM(revenue) AS revenue_mtd,
    SUM(gross_profit) AS gross_profit_mtd,
    COUNT(*) AS n_transactions_mtd,
    AVG(unit_price) AS avg_selling_price_mtd,
    AVG(discount_applied_pct) AS avg_discount_mtd
  FROM retail.silver.clean_sales_enriched
  WHERE sale_date >= DATE_TRUNC('month', CURRENT_DATE)
  GROUP BY 1, 2, 3, 4
),
inventory_now AS (
  SELECT
    product_id,
    AVG(quantity) AS avg_stock
  FROM retail.raw.inventory
  WHERE last_updated = (SELECT MAX(last_updated) FROM retail.raw.inventory)
  GROUP BY 1
)
SELECT
  s.*,
  i.avg_stock,
  
  -- üîë Indicateurs cl√©s retail
  s.revenue_mtd / NULLIF(s.units_sold_mtd, 0) AS avg_selling_price_calc,
  s.gross_profit_mtd / NULLIF(s.revenue_mtd, 0) AS gross_margin_pct_mtd,
  
  -- üì¶ Rotation de stock (MTD)
  s.units_sold_mtd / NULLIF(i.avg_stock, 0) AS stock_turnover_mtd,
  
  -- üè∑Ô∏è Segmentation produit
  CASE
    WHEN s.units_sold_mtd / NULLIF(i.avg_stock, 0) > 4 THEN 'üî• Tr√®s rapide'
    WHEN s.units_sold_mtd / NULLIF(i.avg_stock, 0) > 2 THEN '‚ö° Rapide'
    WHEN s.units_sold_mtd / NULLIF(i.avg_stock, 0) > 1 THEN '‚úÖ Normal'
    WHEN s.units_sold_mtd / NULLIF(i.avg_stock, 0) > 0.3 THEN '‚ö†Ô∏è Lent'
    ELSE 'üíÄ Tr√®s lent'
  END AS stock_velocity_label,

  -- üí∞ Contribution au CA
  s.revenue_mtd / SUM(s.revenue_mtd) OVER () AS revenue_share_mtd

FROM mtd_sales s
LEFT JOIN inventory_now i ON s.product_id = i.product_id
""")

print("‚úÖ Table Gold cr√©√©e : retail.gold.product_performance_mtd")

spark.sql("""
SELECT product_name, category, units_sold_mtd, gross_margin_pct_mtd, 
       stock_velocity_label, revenue_share_mtd
FROM retail.gold.product_performance_mtd
ORDER BY revenue_mtd DESC
LIMIT 5
""").show(truncate=False)

# üèÜ Gold : `daily_kpi_dashboard`  
## Vue consolid√©e *daily snapshot* pour dashboard ex√©cutif  
## ‚Üí Page d'accueil Superset (CEO view)

In [None]:

spark.sql("""
CREATE OR REPLACE TABLE retail.gold.daily_kpi_dashboard
COMMENT 'Vue KPI quotidienne ‚Äî couche Gold (CEO Dashboard)'
AS
WITH today AS (SELECT CURRENT_DATE() - INTERVAL 1 DAYS AS dt), -- hier
     sales_yesterday AS (
       SELECT * FROM retail.silver.clean_sales_enriched
       WHERE sale_date = (SELECT dt FROM today)
     ),
     sales_last_week AS (
       SELECT * FROM retail.silver.clean_sales_enriched
       WHERE sale_date BETWEEN (SELECT dt - INTERVAL 7 DAYS FROM today) 
                           AND (SELECT dt - INTERVAL 1 DAYS FROM today)
     )
SELECT
  (SELECT dt FROM today) AS reporting_date,
  
  -- üìà CA & marge
  COALESCE(SUM(sy.revenue), 0) AS revenue_yesterday,
  COALESCE(SUM(sy.gross_profit), 0) AS gross_profit_yesterday,
  COALESCE(SUM(sy.gross_profit) / NULLIF(SUM(sy.revenue), 0), 0) AS gross_margin_yesterday,
  
  -- üìâ vs semaine derni√®re (moyenne quotidienne)
  COALESCE(SUM(slw.revenue) / 7.0, 0) AS avg_daily_revenue_last_week,
  (COALESCE(SUM(sy.revenue), 0) - COALESCE(SUM(slw.revenue) / 7.0, 0)) / NULLIF(COALESCE(SUM(slw.revenue) / 7.0, 1), 0) AS revenue_vs_last_week_pct,
  
  -- üì¶ Activit√©
  COALESCE(COUNT(DISTINCT sy.sale_id), 0) AS n_transactions_yesterday,
  COALESCE(COUNT(DISTINCT sy.product_id), 0) AS n_products_sold_yesterday,
  COALESCE(COUNT(DISTINCT sy.store_id), 0) AS n_stores_active_yesterday,
  COALESCE(COUNT(DISTINCT sy.salesperson), 0) AS n_salespeople_active_yesterday,
  
  -- ‚ö†Ô∏è Alertes qualit√©
  COALESCE(COUNT(CASE WHEN sy.margin_status = 'Perte' THEN 1 END), 0) AS n_loss_transactions_yesterday,
  COALESCE(COUNT(CASE WHEN sy.alert_flag != 'Normal' THEN 1 END), 0) AS n_alerts_yesterday

FROM sales_yesterday sy
FULL JOIN sales_last_week slw ON 1=1  -- cross join
""")

print("‚úÖ Table Gold cr√©√©e : retail.gold.daily_kpi_dashboard")
spark.sql("SELECT * FROM retail.gold.daily_kpi_dashboard").show(truncate=False)