In [1]:
# Pricing Status Analysis Script
# Converted from SQL query to Python for easier editing and maintenance

# =============================================================================
# STANDARD LIBRARY IMPORTS
# =============================================================================
import os
import warnings
from datetime import datetime, date, timedelta

# =============================================================================
# THIRD-PARTY IMPORTS
# =============================================================================
import numpy as np
import pandas as pd
import snowflake.connector

# =============================================================================
# LOCAL IMPORTS & ENVIRONMENT SETUP
# =============================================================================
import setup_environment_2
import importlib

warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

print("✓ Environment initialized")

# =============================================================================
# SNOWFLAKE QUERY FUNCTION
# =============================================================================

def snowflake_query(country, query, warehouse=None, columns=[], conn=None):
    """
    Execute a query against Snowflake and return results as DataFrame.
    
    Args:
        country: Country identifier (e.g., "Egypt")
        query: SQL query string to execute
        warehouse: Snowflake warehouse (optional)
        columns: Custom column names (optional)
        conn: Existing connection (optional)
        
    Returns:
        pandas DataFrame with query results
    """
    con = snowflake.connector.connect(
        user     = os.environ["SNOWFLAKE_USERNAME"],
        account  = os.environ["SNOWFLAKE_ACCOUNT"],
        password = os.environ["SNOWFLAKE_PASSWORD"],
        database = os.environ["SNOWFLAKE_DATABASE"]
    )

    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        
        column_names = [col[0] for col in cur.description]
        results = cur.fetchall()
        
        if not results:
            out = pd.DataFrame(columns=[name.lower() for name in column_names])
        else:
            if len(columns) == 0:
                out = pd.DataFrame(np.array(results), columns=column_names)
                out.columns = out.columns.str.lower()
            else:
                out = pd.DataFrame(np.array(results), columns=columns)
                out.columns = out.columns.str.lower()
        
        return out
        
    except Exception as e:
        print(f"❌ Query error: {e}")
        raise
        
    finally:
        cur.close()
        con.close()

print("✓ Snowflake query function loaded")


  warn_incompatible_dep(


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json
✓ Environment initialized
✓ Snowflake query function loaded


In [2]:
query = "SHOW PARAMETERS LIKE 'TIMEZONE'"
timezone_result = snowflake_query("Egypt", query)
zone_to_use = timezone_result['value'].values[0]
print(f"✓ Using timezone: {zone_to_use}")

✓ Using timezone: America/Los_Angeles


In [3]:
# =============================================================================
# PART 1: STATIC DATA - Warehouse Mapping
# =============================================================================

def get_warehouse_mapping():
    """Define warehouse to region/cohort mapping."""
    whs_data = [
        ('Cairo', 'Mostorod', 1, 700),
        ('Giza', 'Barageel', 236, 701),
        ('Delta West', 'El-Mahala', 337, 703),
        ('Delta West', 'Tanta', 8, 703),
        ('Delta East', 'Mansoura FC', 339, 704),
        ('Delta East', 'Sharqya', 170, 704),
        ('Upper Egypt', 'Assiut FC', 501, 1124),
        ('Upper Egypt', 'Bani sweif', 401, 1126),
        ('Upper Egypt', 'Menya Samalot', 703, 1123),
        ('Upper Egypt', 'Sohag', 632, 1125),
        ('Alexandria', 'Khorshed Alex', 797, 702),
        ('Giza', 'Sakkarah', 962, 701)
    ]
    
    df_whs = pd.DataFrame(whs_data, columns=['region', 'wh', 'warehouse_id', 'cohort_id'])
    return df_whs

# Get warehouse mapping
df_whs = get_warehouse_mapping()
print("Warehouse Mapping:")


Warehouse Mapping:


In [4]:
# =============================================================================
# PART 2: FETCH COGS DATA
# =============================================================================

def fetch_current_cogs():
    """Fetch current cost of goods sold data."""
    query = """
    SELECT product_id, wac_p
    FROM finance.all_cogs
    WHERE CURRENT_TIMESTAMP BETWEEN from_date AND to_date
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['wac_p'] = pd.to_numeric(df['wac_p'])
    return df

# Run:
df_cogs = fetch_current_cogs()
print(f"COGS records: {len(df_cogs)}")


COGS records: 8102


In [None]:
# =============================================================================
# PART 3: FETCH RUNNING RATES DATA
# =============================================================================

def fetch_running_rates():
    """Fetch predicted running rates - latest per product/warehouse within 14 days."""
    query = """
--------------------------------------------------------------------------------
-- Snowflake: SKU × Warehouse daily forecast (1-day forecast)
-- Enhanced: excludes zero-sales last 4 days (with stock) SKUs
-- (fixed aliasing / casting error in zero_sales_excluded)
--------------------------------------------------------------------------------
WITH params AS (
  SELECT
    CURRENT_DATE() AS run_date,
    DATEADD(month, -3, CURRENT_DATE()) AS history_start,
    21 AS recency_half_life_days,
    4  AS zero_rule_days
),

/* 1) Daily sales aggregation */
sales_base AS (
  SELECT
    pso.product_id            AS PRODUCT_ID,
    pso.warehouse_id,
    CAST(DATE_TRUNC('day', pso.created_at) AS DATE) AS date,
    SUM(pso.purchased_item_count * pso.basic_unit_count) AS sold_units,
    SUM(pso.purchased_item_count * pso.basic_unit_count * pso.item_price)
      / NULLIF(SUM(pso.purchased_item_count * pso.basic_unit_count),0) AS avg_selling_price,
    COUNT(DISTINCT so.retailer_id) AS retailer_count
  FROM product_sales_order pso
  JOIN sales_orders so ON pso.sales_order_id = so.id
  WHERE CAST(DATE_TRUNC('day', pso.created_at) AS DATE) >= (SELECT history_start FROM params)
  GROUP BY 1,2,3
),

/* 2) Stock snapshots -> daily metrics */
stock_snapshots_hourly AS (
  SELECT
    ss.product_id AS product_id,
    ss.warehouse_id,
    CAST(DATE_TRUNC('day', ss.TIMESTAMP) AS DATE) AS date,
    ss.available_stock,
    ss.activation,
    ss.TIMESTAMP AS snapshot_time
  FROM materialized_views.STOCK_SNAP_SHOTS_RECENT ss
  WHERE ss.product_id IS NOT NULL
),

stock_daily AS (
  SELECT
    product_id,
    warehouse_id,
    date,
    MAX_BY(available_stock, snapshot_time) AS stock_closing,
    24 * (
      SUM(CASE WHEN activation = FALSE OR available_stock = 0 THEN 1 ELSE 0 END)::FLOAT
      / NULLIF(COUNT(*),0)
    ) AS oos_hours,
    CASE WHEN MAX(CASE WHEN activation = TRUE AND available_stock > 0 THEN 1 ELSE 0 END) = 1 THEN 1 ELSE 0 END AS in_stock_flag
  FROM stock_snapshots_hourly
  GROUP BY product_id, warehouse_id, date
),

/* 3) Join sales + stock + WAC */
base_data AS (
  SELECT
    sb.product_id,
    sb.warehouse_id,
    sb.date,
    sb.sold_units,
    sb.avg_selling_price,
    sb.retailer_count,
    sd.stock_closing,
    sd.oos_hours,
    sd.in_stock_flag,
    ac.wac_p AS wac,
    CASE WHEN DAYOFWEEKISO(sb.date) IN (5,6) THEN 1 ELSE 0 END AS is_weekend
  FROM sales_base sb
  LEFT JOIN stock_daily sd
    ON sb.product_id = sd.product_id
   AND sb.warehouse_id = sd.warehouse_id
   AND sb.date = sd.date
  LEFT JOIN finance.ALL_COGS ac
    ON sb.product_id = ac.product_id
   AND sb.date BETWEEN ac.from_date AND ac.to_date
  WHERE sd.in_stock_flag = 1
),

/* 4) Stats per SKU × WH */
sku_wh_stats AS (
  SELECT
    product_id,
    warehouse_id,
    AVG(sold_units) AS avg_units,
    STDDEV_SAMP(sold_units) AS SIGMA_D,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY sold_units) AS med_units,
    PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY sold_units) AS pct95_units,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY retailer_count) AS med_retailers,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY CASE 
       WHEN avg_selling_price IS NULL OR avg_selling_price = 0 THEN 0 
       ELSE (avg_selling_price - COALESCE(wac,0))/NULLIF(avg_selling_price,0) END) AS med_margin
  FROM base_data
  GROUP BY product_id, warehouse_id
),


/* 5) Remove outliers */
cleaned AS (
  SELECT
    b.*,
    s.med_units,
    s.pct95_units,
    s.med_retailers,
    s.med_margin,
    CASE WHEN b.sold_units > s.pct95_units THEN s.pct95_units ELSE b.sold_units END AS units_capped,
    CASE WHEN b.retailer_count > GREATEST(2, s.med_retailers * 2) THEN 1 ELSE 0 END AS retailer_spike
  FROM base_data b
  LEFT JOIN sku_wh_stats s
    ON b.product_id = s.product_id AND b.warehouse_id = s.warehouse_id
),

/* 6) Scale down retailer spikes */
adjusted AS (
  SELECT
    *,
    CASE
      WHEN retailer_spike = 1 AND retailer_count > 0 AND med_retailers IS NOT NULL
        THEN ROUND(units_capped * (med_retailers::FLOAT / NULLIF(retailer_count::FLOAT,0)),0)
      ELSE units_capped
    END AS units_adjusted
  FROM cleaned
),

/* 7) Add weighting */
weighted AS (
  SELECT
    a.*,
    DATEDIFF('day', a.date, (SELECT run_date FROM params)) AS days_ago,
    CASE
      WHEN a.date >= DATEADD(day, -21, (SELECT run_date FROM params)) THEN 1.5
      WHEN a.date >= DATEADD(day, -90, (SELECT run_date FROM params)) THEN 1.0
      ELSE 0.5
    END AS w_recency,
    CASE
      WHEN COALESCE(a.in_stock_flag,0) = 1 AND COALESCE(a.oos_hours,0) < 12 THEN 1.4
      WHEN COALESCE(a.in_stock_flag,0) = 1 AND COALESCE(a.oos_hours,0) >= 12 THEN 0.9
      ELSE 0.6
    END AS w_instock,
    CASE WHEN a.is_weekend = 1 THEN 0.7 ELSE 1.0 END AS w_weekend,
    CASE
      WHEN a.avg_selling_price IS NULL OR a.avg_selling_price = 0 THEN 1.0
      WHEN a.med_margin IS NULL THEN 1.0
      ELSE
        CASE
          WHEN ((a.avg_selling_price - COALESCE(a.wac,0)) / NULLIF(a.avg_selling_price,0)) < a.med_margin
            THEN 1.0 + LEAST((a.med_margin - ((a.avg_selling_price - COALESCE(a.wac,0))/NULLIF(a.avg_selling_price,0))) * 2.0, 0.6)
          WHEN ((a.avg_selling_price - COALESCE(a.wac,0)) / NULLIF(a.avg_selling_price,0)) > a.med_margin
            THEN 1.0 - LEAST((((a.avg_selling_price - COALESCE(a.wac,0))/NULLIF(a.avg_selling_price,0)) - a.med_margin) * 2.0, 0.4)
          ELSE 1.0
        END
    END AS w_margin
  FROM adjusted a
),

/* 8) Weighted final rows */
weighted_final AS (
  SELECT
    product_id,
    warehouse_id,
    date,
    units_adjusted,
    w_recency,
    w_instock,
    w_weekend,
    w_margin,
    (w_recency * w_instock * w_weekend * w_margin) AS final_weight,
    in_stock_flag
  FROM weighted
  WHERE units_adjusted IS NOT NULL
    AND CAST(date AS DATE) >= (SELECT history_start FROM params)
),

/* 9) Forecast base */
forecast_base AS (
  SELECT
    product_id,
    warehouse_id,
    SUM(units_adjusted * final_weight) / NULLIF(SUM(final_weight),0) AS weighted_avg_units,
    COUNT(*) AS N_Days_Used
  FROM weighted_final
  GROUP BY product_id, warehouse_id
),

/* 10) Zero-sales last 4 days detection (standard logic) */
last_4_days AS (
  SELECT
    hb.product_id,
    hb.warehouse_id,
    hb.date,
    hb.sold_units,
    hb.in_stock_flag
  FROM base_data hb
  WHERE hb.date >= DATEADD(day, -4, (SELECT run_date FROM params))
    AND hb.date < (SELECT run_date FROM params)
),

last4_flag AS (
  SELECT
    product_id,
    warehouse_id,
    CASE WHEN COUNT(*) = 4
              AND SUM(CASE WHEN COALESCE(sold_units,0) = 0 AND COALESCE(in_stock_flag,0) = 1 THEN 1 ELSE 0 END) = 4
         THEN 1 ELSE 0 END AS last4_all_instock_zero
  FROM last_4_days
  GROUP BY product_id, warehouse_id
),

/* 10.5) Exclude SKUs with stock > 0, zero sales 4 days, and low receipts
   (fixed aliasing, casting and NULL-safe arithmetic) */
zero_sales_excluded AS (
  -- base: sku×warehouse with positive available stock
  SELECT DISTINCT s.warehouse_id, s.product_id AS product_id
  FROM (
    SELECT 
      pw.warehouse_id,
      pw.product_id,
      CAST(SUM(pw.available_stock) AS INT) AS stocks
    FROM product_warehouse pw
    WHERE pw.warehouse_id NOT IN (6,9,10)
      AND pw.is_basic_unit = 1
      AND pw.available_stock > 0
    GROUP BY pw.warehouse_id, pw.product_id
  ) s
  LEFT JOIN (
    SELECT 
      pso.product_id,
      pso.warehouse_id,
      SUM(pso.total_price) AS nmv
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    WHERE so.created_at::date BETWEEN CURRENT_DATE - 5 AND CURRENT_DATE - 1
      AND so.sales_order_status_id NOT IN (7,12)
      AND so.channel IN ('telesales','retailer')
      AND pso.purchased_item_count <> 0
    GROUP BY pso.product_id, pso.warehouse_id
  ) md ON md.product_id = s.product_id AND md.warehouse_id = s.warehouse_id
  LEFT JOIN finance.all_cogs f  
    ON f.product_id = s.product_id
   AND f.from_date::date <= CURRENT_DATE
   AND f.to_date::date > CURRENT_DATE
  LEFT JOIN (
    SELECT pr.warehouse_id, ppr.product_id, SUM(ppr.final_price) AS total_prs
    FROM product_purchased_receipts ppr
    JOIN purchased_receipts pr ON pr.id = ppr.purchased_receipt_id
    JOIN products p ON p.id = ppr.product_id
    WHERE pr.date::date >= CURRENT_DATE - 4
      AND pr.is_actual = 'true'
      AND pr.purchased_receipt_status_id IN (4,5,7)
      AND ppr.purchased_item_count <> 0
    GROUP BY pr.warehouse_id, ppr.product_id
  ) prs_data ON prs_data.product_id = s.product_id AND prs_data.warehouse_id = s.warehouse_id
  WHERE s.stocks > 0
    AND COALESCE(md.nmv,0) = 0
    AND COALESCE(prs_data.total_prs,0) < 0.7 * (COALESCE(f.wac_p,0) * s.stocks)
),

/* 11) First sale detection */
first_sale AS (
  SELECT product_id, warehouse_id, MIN(date) AS first_sale_date
  FROM base_data
  WHERE sold_units > 0
  GROUP BY product_id, warehouse_id
),

/* 12) Final forecast */
final_forecast AS (
  SELECT
    fb.product_id,
    fb.warehouse_id,
    fb.weighted_avg_units,
    fb.N_Days_Used,
    CASE
      WHEN l4.last4_all_instock_zero = 1 THEN 0
      WHEN fs.first_sale_date IS NOT NULL 
           AND fs.first_sale_date >= DATEADD(day, -2, (SELECT run_date FROM params))
         THEN GREATEST(CEIL(fb.weighted_avg_units), 1)
      ELSE CEIL(fb.weighted_avg_units)
    END AS AVG_RUN_RATE
  FROM forecast_base fb
  LEFT JOIN last4_flag l4 ON fb.product_id = l4.product_id AND fb.warehouse_id = l4.warehouse_id
  LEFT JOIN first_sale fs ON fb.product_id = fs.product_id AND fb.warehouse_id = fs.warehouse_id
  LEFT JOIN zero_sales_excluded zse ON fb.product_id = zse.product_id AND fb.warehouse_id = zse.warehouse_id
  WHERE zse.product_id IS NULL
)

SELECT
    ff.product_id,
    ff.warehouse_id,
    ff.AVG_RUN_RATE as rr
FROM final_forecast ff
LEFT JOIN sku_wh_stats s
    ON ff.product_id = s.product_id
   AND ff.warehouse_id = s.warehouse_id
ORDER BY ff.warehouse_id, ff.product_id;
    """
    df = snowflake_query("Egypt", query)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_rr = fetch_running_rates()
print(f"Running rates records: {len(df_rr)}")


In [None]:
# =============================================================================
# PART 4: FETCH STOCKS DATA
# =============================================================================

def fetch_stocks():
    """Fetch stock data with running rates and DOH calculation."""
    query = """
    WITH rr AS (
        SELECT product_id, warehouse_id, rr
        FROM finance.PREDICTED_RUNNING_RATES
        QUALIFY MAX(date) OVER (PARTITION BY product_id, warehouse_id) = date
            AND date::DATE >= CURRENT_DATE - 14
    )
    SELECT 
        pw.warehouse_id,
        pw.product_id,
        pw.available_stock::INTEGER AS stocks,
        COALESCE(rr.rr, 0) AS rr,
        CASE WHEN COALESCE(rr.rr, 0) = 0 THEN pw.available_stock::INTEGER 
             ELSE pw.available_stock::INTEGER / rr.rr 
        END AS doh
    FROM product_warehouse pw
    LEFT JOIN rr ON rr.product_id = pw.product_id AND rr.warehouse_id = pw.warehouse_id
    WHERE pw.warehouse_id NOT IN (6, 9, 10)
        AND pw.is_basic_unit = 1
    """
    df = snowflake_query("Egypt", query)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_stocks = fetch_stocks()
print(f"Stock records: {len(df_stocks)}")


In [None]:
# =============================================================================
# PART 5: FETCH SALES DATA
# =============================================================================

def fetch_sales():
    """Fetch sales data with aggregations for RR and retailer metrics."""
    query = """
        WITH raw_orders AS (
    SELECT
        so.created_at::DATE AS date,
        pso.warehouse_id,
        pso.product_id,
        CONCAT(p.name_ar, ' ', p.size, ' ', pu.name_ar) AS sku,
        b.name_ar AS brand, 
        c.name_ar AS cat,
        so.retailer_id,
        pso.purchased_item_count * pso.basic_unit_count AS qty
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN products p ON p.id = pso.product_id
    JOIN brands b ON p.brand_id = b.id 
    JOIN categories c ON p.category_id = c.id
    JOIN product_units pu ON pu.id = p.unit_id
    WHERE so.created_at::DATE BETWEEN date_trunc('month',CURRENT_DATE - interval '8 months') AND CURRENT_DATE
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
),
daily_agg AS (
    -- Aggregate to daily level for daily metrics
    SELECT
        date,
        warehouse_id,
        product_id,
        sku,
        brand,
        cat,
        SUM(qty) AS qty,
        COUNT(DISTINCT retailer_id) AS num_rets
    FROM raw_orders
    GROUP BY 1, 2, 3, 4, 5, 6
),
daily_metrics AS (
    -- Original daily metrics
    SELECT 
        warehouse_id, 
        product_id,
        sku,
        brand,
        cat,
        PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY CASE WHEN date < CURRENT_DATE - 3 THEN qty END) AS high_rr,
        PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY CASE WHEN date < CURRENT_DATE - 3 THEN num_rets END) AS high_rets,
        COALESCE(STDDEV(CASE WHEN date < CURRENT_DATE - 3 THEN qty END), 0) AS qty_std,
        COALESCE(STDDEV(CASE WHEN date < CURRENT_DATE - 3 THEN num_rets END), 0) AS rets_std,
        COALESCE(SUM(CASE WHEN date = CURRENT_DATE - 1 THEN qty END), 0) AS cu_rr,
        COALESCE(SUM(CASE WHEN date = CURRENT_DATE THEN qty END), 0) AS today_rr,
        COALESCE(SUM(CASE WHEN date = CURRENT_DATE - 1 THEN num_rets END), 0) AS cu_rets
    FROM daily_agg
    GROUP BY 1, 2, 3, 4, 5
),

/* ---------- CURRENT MTD ---------- */
current_mtd AS (
    SELECT
        warehouse_id,
        product_id,
        sku,
        brand,
        cat,
        SUM(qty) AS cu_mtd_rr,
        COUNT(DISTINCT retailer_id) AS cu_mtd_rets
    FROM raw_orders
    WHERE (
        EXTRACT(DAY FROM CURRENT_DATE) = 1
        AND date = CURRENT_DATE - 1
    ) OR (
        EXTRACT(DAY FROM CURRENT_DATE) > 1
        AND date >= DATE_TRUNC('month', CURRENT_DATE)
        AND date < CURRENT_DATE
    )
    GROUP BY 1,2,3,4,5
),

/* ---------- HISTORICAL MTD (aligned day-of-month) ---------- */
historical_mtd AS (
    SELECT
        warehouse_id,
        product_id,
        sku,
        brand,
        cat,
        DATE_TRUNC('month', date) AS month_start,
        SUM(qty) AS mtd_qty,
        COUNT(DISTINCT retailer_id) AS mtd_rets
    FROM raw_orders
    WHERE DATE_TRUNC('month', date) < DATE_TRUNC('month', CURRENT_DATE)
      AND date < DATEADD(
            day,
            EXTRACT(day FROM CURRENT_DATE),
            DATE_TRUNC('month', date)
          )
    GROUP BY 1,2,3,4,5,6
),

historical_full AS (
    SELECT
        warehouse_id,
        product_id,
        sku,
        brand,
        cat,
        DATE_TRUNC('month', date) AS month_start,
        SUM(qty) AS full_qty,
        COUNT(DISTINCT retailer_id) AS full_rets
    FROM raw_orders
    WHERE DATE_TRUNC('month', date) < DATE_TRUNC('month', CURRENT_DATE)
    GROUP BY 1,2,3,4,5,6
),

/* ---------- P80 / MEDIANS ---------- */
mtd_p80_base AS (
    SELECT
        warehouse_id,
        product_id,
        sku,
        brand,
        cat,
        PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY mtd_qty)  AS high_mtd_rr,
        PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY mtd_rets) AS high_mtd_rets,
        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY mtd_qty)  AS median_mtd_rr,
        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY mtd_rets) AS median_mtd_rets
    FROM historical_mtd
    GROUP BY 1,2,3,4,5
),

full_p80_base AS (
    SELECT
        warehouse_id,
        product_id,
        sku,
        brand,
        cat,
        PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY full_qty)  AS high_full_rr,
        PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY full_rets) AS high_full_rets,
        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY full_qty)  AS median_full_rr,
        PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY full_rets) AS median_full_rets
    FROM historical_full
    GROUP BY 1,2,3,4,5
),

mtd_p80 AS (
    SELECT
        p.warehouse_id,
        p.product_id,
        p.sku,
        p.brand,
        p.cat,
        p.high_mtd_rr,
        p.high_mtd_rets,
        COALESCE(STDDEV(CASE WHEN h.mtd_qty  >= p.median_mtd_rr  THEN h.mtd_qty  END),0) AS mtd_qty_std,
        COALESCE(STDDEV(CASE WHEN h.mtd_rets >= p.median_mtd_rets THEN h.mtd_rets END),0) AS mtd_rets_std
    FROM mtd_p80_base p
    LEFT JOIN historical_mtd h
        ON p.warehouse_id = h.warehouse_id
       AND p.product_id  = h.product_id
    GROUP BY 1,2,3,4,5,6,7
),

full_p80 AS (
    SELECT
        p.warehouse_id,
        p.product_id,
        p.sku,
        p.brand,
        p.cat,
        p.high_full_rr,
        p.high_full_rets,
        COALESCE(STDDEV(CASE WHEN h.full_qty  >= p.median_full_rr  THEN h.full_qty  END),0) AS full_qty_std,
        COALESCE(STDDEV(CASE WHEN h.full_rets >= p.median_full_rets THEN h.full_rets END),0) AS full_rets_std
    FROM full_p80_base p
    LEFT JOIN historical_full h
        ON p.warehouse_id = h.warehouse_id
       AND p.product_id  = h.product_id
    GROUP BY 1,2,3,4,5,6,7
)

SELECT
    d.*,
    COALESCE(p.high_mtd_rr,0)     AS high_mtd_rr,
    COALESCE(p.high_mtd_rets,0)  AS high_mtd_rets,
    COALESCE(p.mtd_qty_std,0)    AS mtd_qty_std,
    COALESCE(p.mtd_rets_std,0)   AS mtd_rets_std,
    COALESCE(f.high_full_rr,0)   AS high_full_rr,
    COALESCE(f.high_full_rets,0) AS high_full_rets,
    COALESCE(f.full_qty_std,0)   AS full_qty_std,
    COALESCE(f.full_rets_std,0)  AS full_rets_std,
    COALESCE(c.cu_mtd_rr,0)      AS cu_mtd_rr,
    COALESCE(c.cu_mtd_rets,0)    AS cu_mtd_rets
FROM daily_metrics d
LEFT JOIN mtd_p80  p ON d.warehouse_id = p.warehouse_id AND d.product_id = p.product_id
LEFT JOIN full_p80 f ON d.warehouse_id = f.warehouse_id AND d.product_id = f.product_id
LEFT JOIN current_mtd c ON d.warehouse_id = c.warehouse_id AND d.product_id = c.product_id
ORDER BY high_mtd_rr DESC;

    """
    df = snowflake_query("Egypt", query)
    # Convert numeric columns
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_sales = fetch_sales()
print(f"Sales records: {len(df_sales)}")


In [None]:
# =============================================================================
# PART 6: FETCH PRICES DATA
# =============================================================================

def fetch_prices():
    """Fetch latest prices per product/cohort."""
    cohort_ids = [700, 701, 702, 703, 704, 696, 695, 698, 697, 699, 1123, 1124, 1125, 1126]
    cohort_str = ', '.join(map(str, cohort_ids))
    
    query = f"""
    SELECT cohort_id, product_id, price
    FROM (
        SELECT 
            cpc.cohort_id,
            pu.product_id,
            cpc.price,
            ROW_NUMBER() OVER (PARTITION BY pu.product_id, cpc.cohort_id ORDER BY cpc.created_at DESC) AS rn
        FROM cohort_pricing_changes cpc 
        JOIN PACKING_UNIT_PRODUCTS pu ON pu.id = cpc.product_packing_unit_id
        WHERE cpc.cohort_id IN ({cohort_str})
            AND pu.is_basic_unit = 1 
    )
    WHERE rn = 1
    """
    df = snowflake_query("Egypt", query)
    df['cohort_id'] = pd.to_numeric(df['cohort_id'])
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['price'] = pd.to_numeric(df['price'])
    return df

# Run:
df_prices = fetch_prices()
print(f"Price records: {len(df_prices)}")


In [None]:
# =============================================================================
# PART 7: FETCH MARKETPLACE PRICES DATA
# =============================================================================

def fetch_marketplace_prices():
    """Fetch marketplace price data (min, mod, max)."""
    query = """
    SELECT 
        mp.region,
        mp.product_id,
        AVG(mp.min_price / pup.basic_unit_count) AS min_price,
        AVG(mp.mod_price / pup.basic_unit_count) AS mod_price,
        AVG(mp.max_price / pup.basic_unit_count) AS max_price
    FROM materialized_views.marketplace_prices mp
    JOIN PACKING_UNIT_PRODUCTS pup ON pup.product_id = mp.product_id AND mp.pu_id = pup.packing_unit_id
    GROUP BY 1, 2
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['min_price'] = pd.to_numeric(df['min_price'])
    df['mod_price'] = pd.to_numeric(df['mod_price'])
    df['max_price'] = pd.to_numeric(df['max_price'])
    return df

# Run:
df_mp = fetch_marketplace_prices()
print(f"Marketplace price records: {len(df_mp)}")


In [None]:
# =============================================================================
# PART 8: FETCH BEN SOLIMAN PRICES
# =============================================================================

def fetch_ben_soliman_prices():
    """Fetch Ben Soliman competitor prices with validation."""
    query = f"""
 with lower as (
select distinct product_id,sku,new_d*bs_price as ben_soliman_price,INJECTION_DATE
from (
select maxab_product_id as product_id,maxab_sku as sku,INJECTION_DATE,wac1,wac_p,(bs_price/bs_unit_count) as bs_price,diff,cu_price,case when p1 > 1 then child_quantity else 0 end as scheck,round(p1/2)*2 as p1,p2,case when (ROUND(p1 / scheck) * scheck) = 0 then p1 else (ROUND(p1 / scheck) * scheck) end as new_d
from (
select sm.*,wac1, wac_p, abs((bs_price/bs_unit_count)-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff,cpc.price as cu_price,pup.child_quantity , round((cu_price/(bs_price/bs_unit_count))) as p1, round(((bs_price/bs_unit_count)/cu_price)) as p2
from materialized_views.savvy_mapping sm 
join finance.all_cogs f on f.product_id = sm.maxab_product_id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_Date and f.to_date
join   PACKING_UNIT_PRODUCTS pu on pu.product_id = sm.maxab_product_id and pu.IS_BASIC_UNIT = 1 
join cohort_product_packing_units cpc on cpc.PRODUCT_PACKING_UNIT_ID = pu.id and cohort_id = 700 
join packing_unit_products pup on pup.product_id = sm.maxab_product_id and pup.is_basic_unit = 1  
where bs_price is not null and INJECTION_DATE::date >= CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 5 
and diff > 0.3
and p1 > 1
)
)
qualify max(INJECTION_DATE)over(partition by product_id)  = INJECTION_DATE
),
m_bs as (
select z.* from (
	select maxab_product_id as product_id, maxab_sku as sku, avg(bs_final_price) as ben_soliman_price,INJECTION_DATE
	from (
		select *, row_number() over(partition by maxab_product_id order by diff) as rnk_2 from (
			select *, (bs_final_price-wac_p)/wac_p as diff_2 from (
				select *, bs_price/maxab_basic_unit_count as bs_final_price from (
					select *, row_number() over(partition by maxab_product_id, maxab_pu order by diff) as rnk from (
						select * ,max(INJECTION_DATE::date) over(partition by maxab_product_id, maxab_pu) as max_date,
						from (
							select sm.*,wac1, wac_p, abs(bs_price-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff 
					from materialized_views.savvy_mapping sm 
					join finance.all_cogs f on f.product_id = sm.maxab_product_id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_Date and f.to_date
					where bs_price is not null and INJECTION_DATE::date >= CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 5 
					and diff < 0.3
					)
					qualify max_date = INJECTION_DATE
					) qualify rnk = 1 
				)
			) where diff_2 between -0.5 and 0.5 
		) qualify rnk_2 = 1 
	) group by all
) z 
join finance.all_cogs f on f.product_id = z.product_id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_Date and f.to_date
where ben_soliman_price between f.wac_p*0.7 and f.wac_p*1.3
)

select product_id,sku,avg(ben_soliman_price) as ben_soliman_price
from (
select *
from (
select * 
from m_bs 

union all

 select *
 from lower
 )
 qualify max(INJECTION_DATE) over(partition by product_id) = INJECTION_DATE
 )
 group by all
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['ben_soliman_price'] = pd.to_numeric(df['ben_soliman_price'])
    return df

# Run:
df_bsp = fetch_ben_soliman_prices()
print(f"Ben Soliman price records: {len(df_bsp)}")


In [None]:
# =============================================================================
# PART 9: FETCH SCRAPPED/CLEANED MARKET PRICES
# =============================================================================

def fetch_scrapped_prices():
    """Fetch scraped market prices with min/max/median aggregations."""
    query = """
    WITH current_cogs AS (
        SELECT product_id, wac_p
        FROM finance.all_cogs
        WHERE CURRENT_TIMESTAMP BETWEEN from_date AND to_date
    )
    SELECT 
        product_id,
        region,
        MIN(market_price) AS min_scrapped,
        MAX(market_price) AS max_scrapped,
        MEDIAN(market_price) AS median_scrapped
    FROM (
        SELECT 
            cmp.product_id,
            cmp.region,
            cmp.market_price
        FROM materialized_views.cleaned_market_prices cmp
        JOIN current_cogs f ON f.product_id = cmp.product_id
        WHERE cmp.date >= CURRENT_DATE - 5
            AND cmp.market_price BETWEEN f.wac_p * 0.9 AND f.wac_p * 1.3
        QUALIFY MAX(cmp.date) OVER (PARTITION BY cmp.region, cmp.product_id, cmp.competitor) = cmp.date
    )
    GROUP BY 1, 2
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['min_scrapped'] = pd.to_numeric(df['min_scrapped'])
    df['max_scrapped'] = pd.to_numeric(df['max_scrapped'])
    df['median_scrapped'] = pd.to_numeric(df['median_scrapped'])
    return df

# Run:
df_scrapped = fetch_scrapped_prices()
print(f"Scrapped price records: {len(df_scrapped)}")


In [None]:
# =============================================================================
# PART 10: FETCH TARGETS DATA
# =============================================================================

def fetch_cat_brand_targets():
    """Fetch category/brand targets from commercial plan."""
    query = """
    SELECT 
        cat, 
        brand, 
        SUM(nmv) AS target_nmv, 
        AVG(margin) AS target_bm,
        DATE_TRUNC('month', DATE) AS month_date
    FROM performance.commercial_targets
    WHERE cat IS NOT NULL AND brand IS NOT NULL 
        AND date >= DATE_TRUNC('month', CURRENT_DATE)
    GROUP BY ALL
    """
    df = snowflake_query("Egypt", query)
    df['target_nmv'] = pd.to_numeric(df['target_nmv'])
    df['target_bm'] = pd.to_numeric(df['target_bm'])
    return df

def fetch_cat_targets(df_cat_brand_targets):
    """Calculate category-level targets from brand targets."""
    df = df_cat_brand_targets.copy()
    df['weighted_margin'] = df['target_bm'] * df['target_nmv']
    cat_targets = df.groupby('cat').apply(
        lambda x: x['weighted_margin'].sum() / x['target_nmv'].sum() if x['target_nmv'].sum() > 0 else 0
    ).reset_index()
    cat_targets.columns = ['cat', 'cat_target_margin']
    return cat_targets

# Run:
df_cat_brand_targets = fetch_cat_brand_targets()
df_cat_targets = fetch_cat_targets(df_cat_brand_targets)
print(f"Cat/Brand target records: {len(df_cat_brand_targets)}")


In [None]:
# =============================================================================
# PART 11: FETCH DISCOUNTED SALES DATA
# =============================================================================

def fetch_discounted_sales():
    """Fetch yesterday's discounted sales breakdown."""
    query = """
select warehouse_id,product_id,total_nmv,bundle_nmv,sku_discount_nmv,quantity_nmv,blended_price
from (
select warehouse_id,product_id,total_nmv,bundle_nmv,sku_discount_nmv,quantity_nmv,cogs/min_qty as b_wac , (total_nmv-(cogs+total_discount))/total_nmv as b_margin,b_wac/(1-b_margin) as blended_price
from (
  SELECT  
        pso.warehouse_id,
        pso.product_id,
        SUM(pso.total_price) AS total_nmv,
        SUM(CASE WHEN pso.dynamic_bundle_sales_order_id IS NOT NULL THEN pso.total_price END) AS bundle_nmv,
        SUM(CASE WHEN pso.sku_discount_id IS NOT NULL THEN pso.total_price END) AS sku_discount_nmv,
        SUM(CASE WHEN pso.quantity_discount_id IS NOT NULL THEN pso.total_price END) AS quantity_nmv,
		sum(f.wac_p*pso.purchased_item_count*pso.basic_unit_count) as cogs,
		sum(pso.purchased_item_count*pso.basic_unit_count) as min_qty,
		sum((ITEM_QUANTITY_DISCOUNT_VALUE*pso.purchased_item_count) + (ITEM_DISCOUNT_VALUE*pso.purchased_item_count)) as total_discount
    FROM product_sales_order pso 
    JOIN sales_orders so ON so.id = pso.sales_order_id
	join finance.all_cogs f on f.product_id = pso.product_id and so.created_at between from_date and to_date 
    WHERE so.created_at::DATE = CURRENT_DATE - 1 
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
    GROUP BY 1, 2)
) 
    """
    df = snowflake_query("Egypt", query)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_discounted = fetch_discounted_sales()
print(f"Discounted sales records: {len(df_discounted)}")


In [None]:
# =============================================================================
# PART 13B: FETCH PRODUCT WAREHOUSE ACTIVATION
# =============================================================================

def fetch_product_activation():
    """
    Fetch product warehouse activation status.
    Uses the top selling packing unit per product in the last 3 months 
    as the representative packing unit to get activation status.
    """
    query = """
    WITH top_selling_pu AS (
        -- Get the top selling packing unit per product/warehouse in last 3 months
        SELECT 
            pso.product_id,
            pso.warehouse_id,
            pso.packing_unit_id,
            SUM(pso.total_price) AS total_nmv
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        WHERE so.created_at::DATE >= CURRENT_DATE - 90
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
        GROUP BY 1, 2, 3
        QUALIFY ROW_NUMBER() OVER (
            PARTITION BY pso.product_id, pso.warehouse_id 
            ORDER BY SUM(pso.total_price) DESC
        ) = 1
    )
    SELECT 
        tspu.product_id,
        tspu.warehouse_id,
        tspu.packing_unit_id AS top_selling_pu,
        pw.activation AS activation
    FROM top_selling_pu tspu
    JOIN product_warehouse pw 
        ON pw.product_id = tspu.product_id 
        AND pw.warehouse_id = tspu.warehouse_id
        AND pw.packing_unit_id = tspu.packing_unit_id
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['warehouse_id'] = pd.to_numeric(df['warehouse_id'])
    df['top_selling_pu'] = pd.to_numeric(df['top_selling_pu'])
    return df

# Run:
df_activation = fetch_product_activation()
print(f"Product activation records: {len(df_activation)}")


In [None]:
# =============================================================================
# PART 13C: FETCH OOS YESTERDAY DATA
# =============================================================================

def fetch_oos_yesterday():
    """
    Fetch whether product was out of stock yesterday.
    Returns oos_yesterday = 1 if product had 0 opening AND 0 closing stock,
    meaning it was OOS the entire day.
    """
    query = """
    SELECT distinct product_id, warehouse_id,
        CASE WHEN opening_stocks = 0 AND closing_stocks = 0 THEN 1
             ELSE 0 
        END AS oos_yesterday
    FROM (
        SELECT 
            timestamp,
            product_id,
            warehouse_id, 
            AVAILABLE_STOCK AS closing_stocks,
            LAG(AVAILABLE_STOCK) OVER (PARTITION BY product_id, warehouse_id ORDER BY TIMESTAMP) AS opening_stocks
        FROM materialized_views.stock_day_close
        WHERE timestamp::DATE >= CURRENT_DATE - 2
        QUALIFY opening_stocks IS NOT NULL
    )
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['warehouse_id'] = pd.to_numeric(df['warehouse_id'])
    df['oos_yesterday'] = pd.to_numeric(df['oos_yesterday'])
    return df

# Run:
df_oos_yesterday = fetch_oos_yesterday()
print(f"OOS yesterday records: {len(df_oos_yesterday)}")


In [None]:
# =============================================================================
# PART 13D: FETCH PURCHASE ORDER DATA
# =============================================================================

def fetch_po_data():
    """
    Fetch purchase order data from last 15 days.
    Returns last PO info and count of supplier rejections.
    """
    query = """
    WITH last_data AS (
        SELECT product_id, warehouse_id, confirmation_status, PO_date::DATE AS last_po_date, ordered_qty
        FROM (
            SELECT 
                product_id,
                Target_WAREHOUSE_ID AS warehouse_id,
                confirmation_status,
                created_at AS PO_date,
                MIN_QUANTITY AS ordered_qty,
                reason,
                MAX(PO_date) OVER (PARTITION BY product_id, warehouse_id) AS last_po
            FROM retool.PO_INITIAL_PLAN
            WHERE created_at::DATE >= CURRENT_DATE - 15 
            QUALIFY last_po = PO_date
        )
    ),
    last_15_data AS (
        SELECT 
            product_id,
            target_WAREHOUSE_ID AS warehouse_id,
            COUNT(DISTINCT CASE WHEN confirmation_status <> 'yes' THEN created_at END) AS no_last_15
        FROM retool.PO_INITIAL_PLAN
        WHERE created_at::DATE >= CURRENT_DATE - 15 
        GROUP BY ALL
    )
    SELECT 
        ld.product_id,
        ld.warehouse_id,
        ld.confirmation_status,
        ld.last_po_date,
        ld.ordered_qty,
        COALESCE(lfd.no_last_15, 0) AS no_last_15
    FROM last_data ld 
    LEFT JOIN last_15_data lfd 
        ON lfd.product_id = ld.product_id 
        AND lfd.warehouse_id = ld.warehouse_id
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['warehouse_id'] = pd.to_numeric(df['warehouse_id'])
    df['ordered_qty'] = pd.to_numeric(df['ordered_qty'])
    df['no_last_15'] = pd.to_numeric(df['no_last_15'])
    df['last_po_date'] = pd.to_datetime(df['last_po_date'])
    return df

# Run:
df_po_data = fetch_po_data()
print(f"PO data records: {len(df_po_data)}")


In [None]:
# =============================================================================
# PART 12: FETCH COMMERCIAL CONSTRAINTS (MIN PRICES)
# =============================================================================

def fetch_commercial_constraints():
    """Fetch commercial minimum price constraints."""
    query = """
    SELECT product_id, region, min_price
    FROM (
        SELECT 
            product_id, 
            region, 
            min_price,
            created_at,
            MAX(created_at) OVER (PARTITION BY product_id, region) AS max_created
        FROM finance.minimum_prices
        WHERE is_deleted = 'false'
            AND created_at BETWEEN 
                CASE WHEN DATE_PART('day', CURRENT_DATE) < 7 
                     THEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') 
                     ELSE DATE_TRUNC('month', CURRENT_DATE)
                END
                AND DATE_TRUNC('month', CURRENT_DATE) + INTERVAL '1 month' + INTERVAL '6 days'
    )
    WHERE created_at = max_created
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['min_price'] = pd.to_numeric(df['min_price'])
    return df

# Run:
df_commercial = fetch_commercial_constraints()
print(f"Commercial constraint records: {len(df_commercial)}")


In [None]:
# =============================================================================
# PART 13: FETCH TARGETS DATA (COMPLEX - WAREHOUSE SKU TARGETS)
# =============================================================================

def fetch_targets_data(df_whs):
    """Fetch complex targets data with warehouse-level SKU targets."""
    # Build warehouse IDs list for the query
    warehouse_ids = df_whs['warehouse_id'].tolist()
    wh_str = ', '.join(map(str, warehouse_ids))
    
    query = f"""
    WITH whs AS (
        SELECT *
        FROM (VALUES
            ('Cairo', 'Mostorod', 1, 700),
            ('Giza', 'Barageel', 236, 701),
            ('Delta West', 'El-Mahala', 337, 703),
            ('Delta West', 'Tanta', 8, 703),
            ('Delta East', 'Mansoura FC', 339, 704),
            ('Delta East', 'Sharqya', 170, 704),
            ('Upper Egypt', 'Assiut FC', 501, 1124),
            ('Upper Egypt', 'Bani sweif', 401, 1126),
            ('Upper Egypt', 'Menya Samalot', 703, 1123),
            ('Upper Egypt', 'Sohag', 632, 1125),
            ('Alexandria', 'Khorshed Alex', 797, 702),
            ('Giza', 'Sakkarah', 962, 701)
        ) x(region, wh, warehouse_id, cohort_id)
    ),
    base_sales AS (
        SELECT
            CASE WHEN whs.region LIKE '%Delta%' THEN 'Delta' 
                 WHEN whs.region = 'Cairo' OR whs.region = 'Giza' THEN 'Greater Cairo' 
                 ELSE whs.region 
            END AS region,
            pso.warehouse_id,
            pso.product_id,
            c.name_ar AS cat,
            b.name_ar AS brand,
            SUM(pso.total_price) AS nmv,
            so.created_at::DATE AS sale_date
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN products p ON p.id = pso.product_id
        JOIN categories c ON c.id = p.category_id
        JOIN brands b ON b.id = p.brand_id
        JOIN whs ON whs.warehouse_id = pso.warehouse_id
        WHERE so.sales_order_status_id NOT IN (7, 12)
            AND pso.purchased_item_count <> 0
            AND so.channel IN ('retailer', 'telesales')
            AND so.created_at::DATE BETWEEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '3 month') AND CURRENT_DATE - 1
        GROUP BY 1, 2, 3, 4, 5, 7
    ),
    region_product_nmv AS (
        SELECT region, product_id, cat, brand, SUM(nmv) AS region_product_nmv
        FROM base_sales
        GROUP BY 1, 2, 3, 4
    ),
    warehouse_contribution AS (
        SELECT 
            bs.region,
            bs.warehouse_id,
            bs.product_id,
            bs.cat,
            bs.brand,
            SUM(bs.nmv) AS warehouse_nmv,
            SUM(bs.nmv) / NULLIF(rpn.region_product_nmv, 0) AS wh_cntrb_in_region
        FROM base_sales bs
        JOIN region_product_nmv rpn ON rpn.region = bs.region 
            AND rpn.product_id = bs.product_id
        GROUP BY 1, 2, 3, 4, 5, rpn.region_product_nmv
    ),
    region_sku_cntrb AS (
        SELECT region, product_id, cat, brand,
            SUM(region_product_nmv) / SUM(SUM(region_product_nmv)) OVER (PARTITION BY region, cat, brand) AS sku_cntrb
        FROM region_product_nmv
        GROUP BY 1, 2, 3, 4
    ),
    comm_plan AS (
        SELECT
            CASE WHEN city = 'Alex' THEN 'Alexandria' ELSE city END AS region,
            cat, brand,
            SUM(nmv) AS target
        FROM performance.commercial_targets
        WHERE date BETWEEN DATE_TRUNC('month', CURRENT_DATE) AND CURRENT_DATE - 1
        GROUP BY 1, 2, 3
    ),
    current_month_sales AS (
        SELECT region, warehouse_id, product_id, SUM(nmv) AS nmv
        FROM base_sales
        WHERE sale_date >= DATE_TRUNC('month', CURRENT_DATE)
        GROUP BY 1, 2, 3
    )
    SELECT 
        wc.region,
        wc.warehouse_id,
        wc.product_id,
        wc.cat,
        wc.brand,
        cp.target * rsc.sku_cntrb AS region_sku_target,
        cp.target * rsc.sku_cntrb * wc.wh_cntrb_in_region AS wh_sku_target,
        COALESCE(cms.nmv, 0) AS sales,
        cp.target * rsc.sku_cntrb * wc.wh_cntrb_in_region - COALESCE(cms.nmv, 0) AS rem_nmv
    FROM warehouse_contribution wc
    JOIN region_sku_cntrb rsc ON rsc.region = wc.region 
        AND rsc.product_id = wc.product_id
    JOIN comm_plan cp ON cp.region = wc.region 
        AND cp.cat = wc.cat 
        AND cp.brand = wc.brand
    LEFT JOIN current_month_sales cms ON cms.product_id = wc.product_id 
        AND cms.warehouse_id = wc.warehouse_id
        AND cms.region = wc.region
    """
    df = snowflake_query("Egypt", query)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_targets = fetch_targets_data(df_whs)
print(f"Targets data records: {len(df_targets)}")


In [None]:
# =============================================================================
# PART 14: BUILD PRODUCT METRICS (MERGE ALL DATA)
# =============================================================================

def build_product_metrics(df_stocks, df_sales, df_whs, df_prices, df_cogs, 
                          df_mp, df_bsp, df_scrapped, df_cat_brand_targets, 
                          df_cat_targets, df_discounted, df_activation, df_oos_yesterday, df_po_data):
    """
    Merge all data sources to build product metrics.
    This replicates the 'product_metrics' CTE from the SQL query.
    """
    
    # Start with stocks and sales join
    df = df_stocks.merge(
        df_sales, 
        on=['product_id', 'warehouse_id'], 
        how='inner'
    )
    
    # Join warehouse mapping
    df = df.merge(df_whs, on='warehouse_id', how='inner')
    
    # Join prices (using cohort_id from warehouse mapping)
    df = df.merge(
        df_prices, 
        on=['product_id', 'cohort_id'], 
        how='inner'
    )
    
    # Join COGS
    df = df.merge(df_cogs, on='product_id', how='inner')
    
    # Calculate BM (basic margin)
    df['bm'] = (df['price'] - df['wac_p']) / df['price'].replace(0, np.nan)
    
    # Calculate in_stock_perc
    df['in_stock_perc'] = (df['stocks'] > 0).astype(int)
    
    # Join marketplace prices
    df = df.merge(
        df_mp.rename(columns={
            'min_price': 'mp_min_price',
            'mod_price': 'mp_mod_price', 
            'max_price': 'mp_max_price'
        }), 
        on=['product_id', 'region'], 
        how='left'
    )
    
    # Join Ben Soliman prices
    df = df.merge(df_bsp[['product_id', 'ben_soliman_price']], on='product_id', how='left')
    
    # Join scrapped prices
    df = df.merge(
        df_scrapped.rename(columns={
            'min_scrapped': 'min_scrapped',
            'max_scrapped': 'max_scrapped',
            'median_scrapped': 'median_scrapped'
        }), 
        on=['product_id', 'region'], 
        how='left'
    )
    
    # Join category/brand targets
    df = df.merge(
        df_cat_brand_targets[['cat', 'brand', 'target_bm']].drop_duplicates(),
        on=['cat', 'brand'],
        how='left'
    )
    
    # Join category targets (fallback)
    df = df.merge(df_cat_targets, on='cat', how='left')
    
    # Set target_margin (use cat_brand target, fall back to cat target)
    df['target_margin'] = df['target_bm'].fillna(df['cat_target_margin'])
    
    # Join discounted sales
    df = df.merge(
        df_discounted,
        on=['warehouse_id', 'product_id'],
        how='left'
    )
    
    # Join product activation status
    df = df.merge(
        df_activation[['product_id', 'warehouse_id', 'activation']],
        on=['product_id', 'warehouse_id'],
        how='left'
    )
    
    # Join OOS yesterday status
    df = df.merge(
        df_oos_yesterday[['product_id', 'warehouse_id', 'oos_yesterday']],
        on=['product_id', 'warehouse_id'],
        how='left'
    )
    # Fill missing oos_yesterday with 0 (assume was in stock if no data)
    df['oos_yesterday'] = df['oos_yesterday'].fillna(0)
    
    # Join PO data
    df = df.merge(
        df_po_data[['product_id', 'warehouse_id', 'confirmation_status', 'last_po_date', 'ordered_qty', 'no_last_15']],
        on=['product_id', 'warehouse_id'],
        how='left'
    )
    
    # Filter to positive prices and high_rr
    df = df[(df['price'] > 0) & (df['high_rr'] > 0)]
    
    # Remove duplicates - keep first occurrence per product/warehouse combination
    df = df.drop_duplicates(subset=['product_id', 'warehouse_id'], keep='first')
    
    return df

# Run after fetching all data:
df_metrics = build_product_metrics(
    df_stocks, df_sales, df_whs, df_prices, df_cogs,
    df_mp, df_bsp, df_scrapped, df_cat_brand_targets,
    df_cat_targets, df_discounted, df_activation, df_oos_yesterday, df_po_data
)
print(f"Product metrics records: {len(df_metrics)}")


In [None]:
# =============================================================================
# PART 15: SCORING AND CLASSIFICATION
# =============================================================================

def calculate_predicted_closing_rr(row):
    """
    Calculate predicted closing RR using time-weighted blending (Option 3).
    
    Logic:
    - As month progresses, trust MTD average more (more data available)
    - Early in month, weight recent daily RR more (MTD average is noisy)
    
    Formula:
    - month_progress = days_passed / days_in_month
    - blended_rate = (month_progress × mtd_avg) + ((1 - month_progress) × cu_rr)
    - predicted_closing = cu_mtd_rr + (blended_rate × days_remaining)
    """
    from datetime import datetime
    import calendar
    
    # Get current date info
    today = datetime.now()
    days_passed = today.day - 1  # Days completed (excluding today)
    days_in_month = calendar.monthrange(today.year, today.month)[1]
    days_remaining = days_in_month - days_passed
    
    # Get values from row
    cu_rr = row.get('cu_rr', 0) or 0
    cu_mtd_rr = row.get('cu_mtd_rr', 0) or 0
    
    # Handle edge cases
    if days_passed == 0:
        # Day 1 of month: no MTD data, use cu_rr as daily estimate
        return cu_rr * days_in_month
    
    if cu_mtd_rr == 0 and cu_rr == 0:
        # No sales at all
        return 0
    
    # Calculate MTD average daily rate
    mtd_avg = cu_mtd_rr / days_passed
    
    # Time-weighted blending
    # month_progress: 0.0 (start of month) to 1.0 (end of month)
    month_progress = days_passed / days_in_month
    
    # Blend: more weight on MTD as month progresses
    mtd_weight = month_progress
    recent_weight = 1 - month_progress
    
    blended_daily_rate = (mtd_weight * mtd_avg) + (recent_weight * cu_rr)
    
    # Predicted closing = what we have + projected remaining
    predicted_closing_rr = cu_mtd_rr + (blended_daily_rate * days_remaining)
    
    return round(predicted_closing_rr, 0)


def add_scoring_classification(df, df_commercial):
    """
    Add scoring and classification columns.
    Replicates 'scored_classified' and 'final_scored' CTEs.
    """
    df = df.copy()
    
    # Calculate predicted closing RR (Option 3: Time-Weighted Blending)
    df['predicted_closing_rr'] = df.apply(calculate_predicted_closing_rr, axis=1)
    
    # Map region for commercial constraints
    df['region_mapped'] = df['region'].apply(
        lambda x: 'Greater Cairo' if x in ['Cairo', 'Giza'] else x
    )
    
    # Join commercial constraints
    df = df.merge(
        df_commercial.rename(columns={'min_price': 'commercial_min'}),
        left_on=['product_id', 'region_mapped'],
        right_on=['product_id', 'region'],
        how='left',
        suffixes=('', '_comm')
    )
    
    # Calculate individual discount percentages
    df['sku_discount_perc'] = df['sku_discount_nmv'].fillna(0) / df['total_nmv'].replace(0, np.nan)
    df['quantity_discount_perc'] = df['quantity_nmv'].fillna(0) / df['total_nmv'].replace(0, np.nan)
    
    # Calculate total offers percentage (excluding bundle_nmv)
    df['offers_perc'] = df['sku_discount_perc'].fillna(0) + df['quantity_discount_perc'].fillna(0)
    
    # Calculate blended margin (margin using net price after all discounts)
    df['blended_margin'] = (df['blended_price'] - df['wac_p']) / df['blended_price'].replace(0, np.nan)
    
    # Calculate combined market prices
    df['combined_min_market'] = df[['mp_min_price','mp_mod_price', 'ben_soliman_price', 'min_scrapped']].min(axis=1)
    df['combined_max_market'] = df[['mp_max_price', 'ben_soliman_price', 'max_scrapped']].max(axis=1)
    
    # Calculate combined median (average of available medians)
    median_cols = ['mp_mod_price', 'ben_soliman_price', 'median_scrapped']
    df['combined_median_market'] = df[median_cols].mean(axis=1, skipna=True)
    
    # Calculate mp_price_score
    df['mp_price_score'] = (df['price'] - df['mp_min_price']) / (
        df['mp_max_price'] - df['mp_min_price']
    ).replace(0, np.nan)
    
    # Stock comment
    def get_stock_comment(row):
        if row['in_stock_perc'] == 0:
            return 'OOS'
        elif row['doh'] > 30:
            return 'Over Stocked'
        elif row['doh'] < 4:
            return 'low stock'
        else:
            return 'Good stocks'
    
    df['stock_comment'] = df.apply(get_stock_comment, axis=1)
    
    # RR comment - Weighted MTD and CU logic
    # Weights: MTD = 0.6 (higher importance), CU = 0.4
    MTD_WEIGHT = 0.7
    CU_WEIGHT = 0.3
    
    # Status to numeric level mapping
    STATUS_LEVELS = {'Low': 1, 'Normal': 2, 'High': 3, 'Very High': 4}
    
    def get_rr_comment(row):
        # CU RR variables
        cu_rr = row['cu_rr']
        high_rr = row['high_rr']
        std = row['qty_std']
        
        # MTD RR variables
        cu_mtd_rr = row.get('cu_mtd_rr', 0) or 0
        high_mtd_rr = row.get('high_mtd_rr', 0) or 0
        mtd_std = row.get('mtd_qty_std', 0) or 0
        
        # Step 1: Determine MTD Status
        if cu_mtd_rr >= high_mtd_rr - 0.5 * mtd_std and cu_mtd_rr <= high_mtd_rr + 0.5 * mtd_std:
            mtd_status = 'Normal'
        elif cu_mtd_rr < high_mtd_rr - 0.5 * mtd_std:
            mtd_status = 'Low'
        elif cu_mtd_rr > high_mtd_rr + 0.5 * mtd_std and cu_mtd_rr <= high_mtd_rr + 1.5 * mtd_std:
            mtd_status = 'High'
        elif cu_mtd_rr > high_mtd_rr + 1.5 * mtd_std:
            mtd_status = 'Very High'
        else:
            mtd_status = 'Normal'
        
        # Step 2: Determine CU RR Status
        if cu_rr >= high_rr - 0.5 * std and cu_rr <= high_rr + 0.5 * std:
            cu_status = 'Normal'
        elif cu_rr < high_rr - 0.5 * std:
            cu_status = 'Low'
        elif cu_rr >= high_rr + 0.5 * std and cu_rr <= high_rr + 1.5 * std:
            cu_status = 'High'
        elif cu_rr > high_rr + 1.5 * std:
            cu_status = 'Very High'
        else:
            cu_status = 'Normal'
        
        # Step 3: Calculate weighted score
        mtd_level = STATUS_LEVELS[mtd_status]
        cu_level = STATUS_LEVELS[cu_status]
        weighted_score = (mtd_level * MTD_WEIGHT) + (cu_level * CU_WEIGHT)
        
        # Step 4: Map weighted score to final status
        # Score range: 1.0 (both Low) to 4.0 (both Very High)
        if weighted_score < 1.5:
            return 'low rr'
        elif weighted_score < 2.5:
            return 'Normal rr'
        elif weighted_score < 3.5:
            return 'High rr'
        else:
            return 'Very High rr'
    
    df['rr_comment'] = df.apply(get_rr_comment, axis=1)
    
    # Rets comment
    def get_rets_comment(row):
        cu_rets = row['cu_rets']
        high_rets = row['high_rets']
        rets_std = row['rets_std']
        
        if cu_rets >= high_rets - 0.5 * rets_std and cu_rets <= high_rets + 0.5 * rets_std:
            return 'Normal rets'
        elif cu_rets < high_rets - 0.5 * rets_std:
            return 'low rets'
        elif cu_rets >= high_rets + 0.5 * rets_std and cu_rets <= high_rets + 1.5 * rets_std:
            return 'High rets'
        elif cu_rets > high_rets + 1.5 * rets_std:
            return 'Very High rets'
        else:
            return ''
    
    df['rets_comment'] = df.apply(get_rets_comment, axis=1)
    
    # Closing RR comment - compare predicted closing with high_full_rr using full_qty_std
    def get_closing_rr_comment(row):
        predicted_closing = row.get('predicted_closing_rr', 0) or 0
        high_full_rr = row.get('high_full_rr', 0) or 0
        full_qty_std = row.get('full_qty_std', 0) or 0
        
        if high_full_rr == 0:
            return ''
        
        if predicted_closing >= high_full_rr - 0.5 * full_qty_std and predicted_closing <= high_full_rr + 0.5 * full_qty_std:
            return 'Normal closing'
        elif predicted_closing < high_full_rr - 0.5 * full_qty_std:
            return 'Low closing'
        elif predicted_closing > high_full_rr + 0.5 * full_qty_std and predicted_closing <= high_full_rr + 1.5 * full_qty_std:
            return 'High closing'
        elif predicted_closing > high_full_rr + 1.5 * full_qty_std:
            return 'Very High closing'
        else:
            return 'Normal closing'
    
    df['closing_rr_comment'] = df.apply(get_closing_rr_comment, axis=1)
    
    # Calculate remaining NMV = (high_full_rr - predicted_closing_rr) * price
    df['closing_remaining_nmv'] = (df['high_full_rr'].fillna(0) - df['predicted_closing_rr'].fillna(0)) * df['price'].fillna(0)
    
    return df


In [None]:
# =============================================================================
# PART 16: FINAL SCORING - MARKET POSITION & PRICE COMMENTS
# =============================================================================

def add_final_scoring(df):
    """
    Add final scoring columns: combined_price_score, market_position_status, price_comment.
    """
    df = df.copy()
    
    # Combined price score
    def calc_combined_price_score(row):
        combined_max = row['combined_max_market']
        combined_min = row['combined_min_market']
        price = row['price']
        mp_score = row['mp_price_score']
        
        if pd.notna(combined_max) and combined_max > 0 and pd.notna(combined_min) and combined_min < 1e9:
            if combined_max != combined_min:
                return (price - combined_min) / (combined_max - combined_min)
        return mp_score
    
    df['combined_price_score'] = df.apply(calc_combined_price_score, axis=1)
    
    # Market position status
    def get_market_position(row):
        price = row['price']
        combined_min = row['combined_min_market']
        combined_median = row['combined_median_market']
        combined_max = row['combined_max_market']
        mp_min = row['mp_min_price']
        bsp = row['ben_soliman_price']
        median_scr = row['median_scrapped']
        
        # Check if no market data
        if (pd.isna(combined_median) and pd.isna(mp_min) and 
            pd.isna(bsp) and pd.isna(median_scr)):
            return 'No Market Data'
        
        # Adjust for edge cases
        min_val = combined_min if pd.notna(combined_min) and combined_min < 1e9 else None
        max_val = combined_max if pd.notna(combined_max) and combined_max > 0 else None
        
        if min_val is not None:
            if price < min_val:
                return 'Below Market'
            elif price <= min_val * 1.005:
                return 'At Market Min'
        
        if pd.notna(combined_median):
            if price < combined_median * 0.995:
                return 'Below Median'
            elif price <= combined_median * 1.005:
                return 'At Median'
        
        if max_val is not None:
            if price < max_val * 0.995:
                return 'Above Median'
            elif price <= max_val * 1.005:
                return 'At Market Max'
            elif price > max_val * 1.005:
                return 'Above Market'
        
        return 'At Median'
    
    df['market_position_status'] = df.apply(get_market_position, axis=1)
    
    # Price comment
    def get_price_comment(row):
        combined_min = row['combined_min_market']
        combined_max = row['combined_max_market']
        price = row['price']
        bm = row['bm']
        target = row['target_margin']
        mp_score = row['mp_price_score']
        
        # Calculate price score
        if pd.notna(combined_max) and pd.notna(combined_min) and combined_max != combined_min:
            price_score = (price - combined_min) / (combined_max - combined_min)
        else:
            price_score = mp_score
        
        if pd.isna(price_score):
            if pd.notna(bm) and pd.notna(target):
                return 'below target' if bm < target else 'above target'
            return 'above target'
        
        # price_score >= 0: at or above market minimum
        if price_score >= 0 and bm > target:
            return 'High price'
        elif price_score >= 0 and bm < target:
            return 'Credit note'
        # price_score < 0: below market minimum
        elif price_score < 0 and bm < target:
            return 'Low Price'
        elif price_score < 0 and bm > target:
            return 'room to reduce'
        elif bm < target:
            return 'below target'
        else:
            return 'above target'
    
    df['price_comment'] = df.apply(get_price_comment, axis=1)
    
    return df


In [None]:
# =============================================================================
# PART 17: ACTION CLASSIFICATION LOGIC
# =============================================================================

def determine_action(row):
    """
    Determine recommended action based on stock, price, and RR status.
    This replicates the complex CASE statement in the final SELECT.
    """
    stock_comment = row['stock_comment']
    price_comment = row['price_comment']
    rr_comment = row['rr_comment']
    offers_perc = row.get('offers_perc', 0) or 0
    commercial_min = row.get('commercial_min')
    bm = row['bm']
    target = row['target_margin']
    cu_rr = row['cu_rr']
    today_rr = row['today_rr']
    stocks = row['stocks']
    activation = row.get('activation', True)
    oos_yesterday = row.get('oos_yesterday', 0)
    price = row['price']
    blended_price = row.get('blended_price')
    blended_margin = row.get('blended_margin')
    combined_min_market = row.get('combined_min_market')
    
    # OOS - always needs purchase regardless of other conditions
    if stock_comment == 'OOS':
        return 'Purchase'
    
    # If product was OOS yesterday and has low rr, no action needed
    # (low rr is expected when product was out of stock)
    if rr_comment == 'low rr' and oos_yesterday == 1 and today_rr > 0 :
        return 'No action'
    
    # Check if High RR / Very High RR products need offer revision
    # Only applies if SKU has active offers (offers_perc > 0)
    # If blended price is 1% below min market price OR (no market data AND blended margin is 15% below target)
    if rr_comment in ['High rr', 'Very High rr'] and pd.notna(blended_price) and offers_perc > 0:
        has_market_data = pd.notna(combined_min_market)
        if has_market_data:
            # Blended price is 1% or more below minimum market price
            if blended_price < combined_min_market * 0.99:
                return 'Revisit the offer'
        else:
            # No market data - check if blended margin is 15% below target
            if pd.notna(blended_margin) and pd.notna(target) and blended_margin < (target*0.9):
                return 'Revisit the offer'
    
    # Good stocks scenarios
    if stock_comment == 'Good stocks':
        if price_comment in ['Low Price', 'below target'] and rr_comment == 'low rr':
            if offers_perc < 0.1:
                return 'Offers & Credit Note'
            return 'Credit Note'
        if price_comment in ['Low Price', 'below target'] and rr_comment != 'low rr':
            return 'Increase price'
        if price_comment == 'Credit note' and rr_comment == 'low rr':
            return 'Credit Note'
        # With market data: price position known - reduce price
        if price_comment in ['High price', 'room to reduce'] and rr_comment == 'low rr':
            if pd.isna(commercial_min) or commercial_min < price*0.99:
                return 'Reduce price'
            return 'Remove commercial min'
        # No market data: only margin > target - check offers first
        if price_comment == 'above target' and rr_comment == 'low rr':
            if offers_perc < 0.1:
                return 'Offers'
            if pd.isna(commercial_min) or commercial_min < price*0.99:
                return 'Reduce price'
            return 'Remove commercial min'
        if rr_comment == 'Normal rr':
            return 'No action'
        if rr_comment == 'Very High rr' and bm < target:
            return 'Increase price'
        if rr_comment in ['Very High rr', 'High rr'] and bm >= target:
            return 'No action'
        if rr_comment == 'High rr' and bm < target:
            return 'Increase price a bit'
    
    # Low stock scenarios
    if stock_comment == 'low stock':
        if price_comment == 'Credit note' and rr_comment == 'low rr':
            return 'Purchase & Credit Note'
        # No market data: margin < target - need purchase + credit note
        if price_comment == 'below target' and rr_comment == 'low rr':
            return 'Purchase & Credit Note'
        # With market data: price < min, margin < target
        if price_comment == 'Low Price' and rr_comment == 'low rr':
            if offers_perc < 0.1:
                return 'Purchase & Offers & Credit Note'
            return 'Purchase & Credit Note'
        # With market data: price position known - purchase + reduce price
        if price_comment in ['High price', 'room to reduce'] and rr_comment == 'low rr':
            if pd.isna(commercial_min) or commercial_min < price*0.99:
                return 'Purchase & Reduce price'
            return 'Purchase & Remove commercial min'
        # No market data: margin > target - check offers first
        if price_comment == 'above target' and rr_comment == 'low rr':
            if offers_perc < 0.1:
                return 'Purchase & Offers'
            if pd.isna(commercial_min) or commercial_min < price*0.99:
                return 'Purchase & Reduce price'
            return 'Purchase & Remove commercial min'
        if rr_comment in ['High rr', 'Normal rr']:
            return 'Purchase'
        if rr_comment == 'Very High rr':
            return 'Purchase & Increase price'
    
    # Over stocked scenarios
    if stock_comment == 'Over Stocked':
        if price_comment in ['below target', 'Low Price', 'Credit note'] and rr_comment == 'low rr':
            return 'Credit Note'
        # With market data: price position known
        if price_comment in ['High price', 'room to reduce'] and rr_comment == 'low rr':
            if cu_rr > 0:
                if pd.isna(commercial_min) or commercial_min < price*0.99:
                    return 'Reduce price'
                return 'Remove commercial min'
            elif today_rr == 0:
                if activation == False:
                    return 'Reactivate'
                else:  # activation == True
                    if pd.isna(commercial_min) or commercial_min < price*0.99:
                        return 'Reduce price'
                    return 'Remove commercial min'
            else:
                # cu_rr <= 0 but today_rr > 0: sales recovering, no action needed
                return 'No action'
        # No market data: margin > target - check offers first
        if price_comment == 'above target' and rr_comment == 'low rr':
            if cu_rr > 0:
                if offers_perc < 0.1:
                    return 'Offers'
                if pd.isna(commercial_min) or commercial_min < price*0.99:
                    return 'Reduce price'
                return 'Remove commercial min'
            elif today_rr == 0:
                if activation == False:
                    return 'Reactivate'
                else:  # activation == True
                    if offers_perc < 0.1:
                        return 'Offers'
                    if pd.isna(commercial_min) or commercial_min < price*0.99:
                        return 'Reduce price'
                    return 'Remove commercial min'
            else:
                # cu_rr <= 0 but today_rr > 0: sales recovering, no action needed
                return 'No action'
        if price_comment in ['below target', 'Low Price', 'Credit note'] and rr_comment in ['Very High rr', 'High rr', 'Normal rr']:
            if stocks / (cu_rr if cu_rr > 0 else 1) < 30:
                return 'No Action'
            return 'Credit Note'
        if price_comment in ['High price', 'above target'] and rr_comment in ['Very High rr', 'High rr', 'Normal rr']:
            if stocks / (cu_rr if cu_rr > 0 else 1) < 30:
                return 'No Action'
            return 'Reduce Price'
    
    # Additional edge cases
    if price_comment in ['below target', 'Low Price'] and rr_comment == 'low rr':
        if cu_rr == 0 and today_rr > 0:
            return 'No action'
        elif cu_rr == 0:
            if activation == False:
                return 'Reactivate'
            else:  # activation == True
                return 'Credit Note'
    
    # Edge case for above target with no running rate
    if price_comment == 'above target' and rr_comment == 'low rr':
        if cu_rr == 0 and today_rr > 0:
            return 'No action'
        elif cu_rr == 0:
            if activation == False:
                return 'Reactivate'
            else:  # activation == True
                if offers_perc < 0.1:
                    return 'Offers'
                return 'Reduce price'
    
    return None

def add_actions(df):
    """Add action column (team assignment done separately after all action modifications)."""
    df = df.copy()
    
    # Determine action
    df['action'] = df.apply(determine_action, axis=1)
    
    return df


def assign_teams(df):
    """
    Assign teams based on final action.
    This should be called AFTER all action modifications (including add_stock_issue_owner).
    """
    df = df.copy()
    
    # Assign to teams based on action
    df['pricing_team'] = df['action'].apply(
        lambda x: 1 if pd.notna(x) and ('price' in str(x).lower() or 'offers' in str(x).lower() or 'offer' in str(x).lower()) else None
    )
    df['purchase_team'] = df['action'].apply(
        lambda x: 1 if pd.notna(x) and 'purchase' in str(x).lower() else None
    )
    df['commercial_team'] = df['action'].apply(
        lambda x: 1 if pd.notna(x) and ('credit note' in str(x).lower() or 
                                         'commercial min' in str(x).lower() or 
                                         'reactivate' in str(x).lower() or
                                         'supplier' in str(x).lower()) else None
    )
    
    return df


def add_stock_issue_owner(df):
    """
    Determine who is responsible for stock issues (OOS/Low stock).
    Only applies to Low stock and OOS products.
    
    Logic:
    - If ordered_qty is low (< 3*cu_rr or < 0.9*high_rr if cu_rr=0) → Purchase team
    - If in top 60% of positive NMV gap AND no_last_15 > 0 AND ordered in last 2 days → Commercial team
    - If not ordered in last 2 days → Purchase team
    
    Also updates the action based on the issue ownership.
    """
    df = df.copy()
    
    # Calculate minimum required order qty
    df['min_required_qty'] = df.apply(
        lambda row: 3 * (
    0.85*row['high_rr'] if row['cu_rr'] == 0
    else row['cu_rr'] if row['high_rr'] == 0
    else min(row['high_rr'], row['cu_rr'])
),
        axis=1
    )
    
    # Check if ordered qty is low (ordered_qty < min_required_qty)
    df['ordered_qty_low'] = df['ordered_qty'].fillna(0) < df['min_required_qty']
    
    # Calculate days since last PO
    today = pd.Timestamp.now().normalize()
    df['days_since_po'] = df['last_po_date'].apply(
        lambda x: (today - x).days if pd.notna(x) else None
    )
    
    # Calculate nmv_gap: (high_rr * price) - (cu_rr * price)
    df['nmv_gap'] = (df['high_rr'] * df['price']) - (df['cu_rr'] * df['price'])
    
    # Calculate cumulative contribution of positive NMV gap
    # Only consider positive gaps (behind target)
    df['positive_nmv_gap'] = df['nmv_gap'].apply(lambda x: max(0, x) if pd.notna(x) else 0)
    
    # IMPORTANT: Deduplicate BEFORE calculating cumulative contribution
    # to ensure correct gap percentages
    df_for_gap = df[['product_id', 'warehouse_id', 'positive_nmv_gap']].drop_duplicates(
        subset=['product_id', 'warehouse_id'], keep='first'
    )
    
    # Sort by positive gap descending and calculate cumulative contribution
    df_sorted = df_for_gap.sort_values('positive_nmv_gap', ascending=False).copy()
    total_positive_gap = df_sorted['positive_nmv_gap'].sum()
    
    if total_positive_gap > 0:
        df_sorted['cumulative_gap'] = df_sorted['positive_nmv_gap'].cumsum()
        df_sorted['cumulative_gap_pct'] = df_sorted['cumulative_gap'] / total_positive_gap
        # Mark products in top 60% of gap contribution
        df_sorted['in_top_60_gap'] = df_sorted['cumulative_gap_pct'] <= 0.6
    else:
        df_sorted['in_top_60_gap'] = False
    
    # Merge back the in_top_60_gap flag
    df = df.merge(
        df_sorted[['product_id', 'warehouse_id', 'in_top_60_gap']],
        on=['product_id', 'warehouse_id'],
        how='left'
    )
    
    # Determine stock issue owner and update action (only for OOS and low stock)
    def get_stock_issue_info(row):
        stock_comment = row['stock_comment']
        
        # Only applies to OOS and low stock
        if stock_comment not in ['OOS', 'low stock']:
            return None, row['action']
        
        ordered_qty = row.get('ordered_qty')
        days_since_po = row.get('days_since_po')
        no_last_15 = row.get('no_last_15', 0) or 0
        in_top_60_gap = row.get('in_top_60_gap', False)
        ordered_qty_low = row.get('ordered_qty_low', False)
        last_po_date = row.get('last_po_date')
        min_required_qty = row.get('min_required_qty', 0)
        
        # Format last_po_date for display
        last_po_str = last_po_date.strftime('%Y-%m-%d') if pd.notna(last_po_date) else 'Never'
        
        # If not ordered in last 2 days → Purchase team - need to place order
        if pd.isna(days_since_po) or days_since_po > 2:
            owner = 'Purchase team'
            action = f'Purchase (last order: {last_po_str})'
            return owner, action
        
        # If ordered qty is low → Purchase team - ordered but not enough
        if ordered_qty_low:
            owner = 'Purchase team'
            action = f'Purchase (ordered qty {int(ordered_qty)} is low, need {int(min_required_qty)})'
            return owner, action
        
        # If in top 60% gap AND multiple no confirmations AND ordered recently → Commercial team
        if in_top_60_gap and no_last_15 > 0 and days_since_po <= 2:
            owner = 'Commercial team'
            action = f'Supplier issue ({int(no_last_15)} rejections) - negotiate with supplier'
            return owner, action
        
        # Default: Purchase team (ordered but other issues)
        # owner = 'Purchase team'
        # action = f'Purchase (last order: {last_po_str})'
        owner = None
        action = 'No action'
        return owner, action
    
    # Apply the function to get stock issue info
    df['_stock_issue_info'] = df.apply(get_stock_issue_info, axis=1)
    
    # Extract owner and action from the tuple
    df['stock_issue_owner'] = df['_stock_issue_info'].apply(lambda x: x[0] if x else None)
    df['_new_action'] = df['_stock_issue_info'].apply(lambda x: x[1] if x else None)
    
    # Update action only for OOS/low stock products
    mask = df['stock_comment'].isin(['OOS', 'low stock'])
    df.loc[mask, 'action'] = df.loc[mask, '_new_action']
    
    # Clean up temporary columns
    df = df.drop(columns=['min_required_qty', 'ordered_qty_low', 'positive_nmv_gap', 'in_top_60_gap', '_stock_issue_info', '_new_action'], errors='ignore')
    
    return df


In [None]:
# =============================================================================
# PART 18: FINALIZE OUTPUT & ADD CALCULATED COLUMNS
# =============================================================================

def finalize_output(df, df_targets):
    """
    Finalize the output DataFrame with all calculated columns.
    Add stock value, stock contribution, and join targets data.
    """
    df = df.copy()
    
    # Calculate stock value
    df['stock_value'] = df['stocks'] * df['price']
    
    # Calculate stock contribution per warehouse
    df['stock_cntrb'] = df.groupby('warehouse_id')['stock_value'].transform(
        lambda x: x / x.sum() if x.sum() > 0 else 0
    )
    
    # Join targets data (deduplicate targets first to avoid row multiplication)
    df_targets_dedup = df_targets[['warehouse_id', 'product_id', 'wh_sku_target', 'rem_nmv']].drop_duplicates(
        subset=['warehouse_id', 'product_id'], keep='first'
    )
    df = df.merge(
        df_targets_dedup,
        on=['warehouse_id', 'product_id'],
        how='left'
    )
    
    # Clean up combined_min_market (replace inf with None)
    df['combined_min_market'] = df['combined_min_market'].replace([np.inf, -np.inf, 1e9], np.nan)
    df['combined_max_market'] = df['combined_max_market'].replace([0, np.inf, -np.inf], np.nan)
    
    # Select and order final columns
    final_columns = [
        'region', 'wh', 'warehouse_id', 'product_id', 'sku', 'cat', 'brand',
        'stocks', 'doh', 'stock_comment','wac_p', 'price', 'blended_price', 'bm', 'blended_margin', 'target_margin', 'price_comment',
        'mp_min_price', 'mp_mod_price', 'mp_max_price', 'ben_soliman_price',
        'min_scrapped', 'median_scrapped', 'max_scrapped',
        'combined_min_market', 'combined_median_market', 'combined_max_market',
        'mp_price_score', 'combined_price_score', 'market_position_status',
        'high_rr', 'cu_rr', 'today_rr', 'high_mtd_rr', 'cu_mtd_rr', 'predicted_closing_rr', 'high_full_rr', 'closing_rr_comment', 'closing_remaining_nmv', 'rr_comment',
        'high_rets', 'cu_rets', 'rets_comment', 'sku_discount_perc', 'quantity_discount_perc', 'offers_perc', 'commercial_min',
        'action', 'pricing_team', 'purchase_team', 'commercial_team', 'activation', 'oos_yesterday',
        'last_po_date', 'ordered_qty', 'confirmation_status', 'no_last_15', 'days_since_po', 'stock_issue_owner',
        'stock_value', 'stock_cntrb', 'wh_sku_target', 'rem_nmv'
    ]
    
    # Keep only columns that exist
    existing_cols = [c for c in final_columns if c in df.columns]
    df = df[existing_cols]
    
    # Sort by high_rr * price descending
    df['_sort_key'] = df['high_rr'] * df['price']
    df = df.sort_values('_sort_key', ascending=False).drop('_sort_key', axis=1)
    
    # Rename 'wh' to 'warehouse_name' for clarity
    df = df.rename(columns={'wh': 'warehouse_name', 'cu_rr': 'current_rr'})
    
    # Final deduplication - ensure no duplicate product/warehouse rows
    df = df.drop_duplicates(subset=['product_id', 'warehouse_id'], keep='first')
    
    return df


In [None]:
# =============================================================================
# PART 19: MAIN EXECUTION - RUN THE COMPLETE ANALYSIS
# =============================================================================

def run_pricing_status_analysis():
    """
    Main function to run the complete pricing status analysis.
    Uses global dataframes that were already fetched by running the cells above.
    
    Returns:
        DataFrame with all pricing status metrics and recommended actions.
    """
    print("=" * 60)
    print("PRICING STATUS ANALYSIS")
    print("=" * 60)
    
    # Use global dataframes (already fetched by running cells above)
    print("\nUsing pre-fetched data:")
    print(f"    ✓ df_whs: {len(df_whs)} warehouses")
    print(f"    ✓ df_cogs: {len(df_cogs)} COGS records")
    print(f"    ✓ df_stocks: {len(df_stocks)} stock records")
    print(f"    ✓ df_sales: {len(df_sales)} sales records")
    print(f"    ✓ df_prices: {len(df_prices)} price records")
    print(f"    ✓ df_mp: {len(df_mp)} marketplace price records")
    print(f"    ✓ df_bsp: {len(df_bsp)} Ben Soliman price records")
    print(f"    ✓ df_scrapped: {len(df_scrapped)} scrapped price records")
    print(f"    ✓ df_cat_brand_targets: {len(df_cat_brand_targets)} category/brand targets")
    print(f"    ✓ df_cat_targets: {len(df_cat_targets)} category targets")
    print(f"    ✓ df_discounted: {len(df_discounted)} discounted sales records")
    print(f"    ✓ df_commercial: {len(df_commercial)} commercial constraint records")
    print(f"    ✓ df_targets: {len(df_targets)} target records")
    print(f"    ✓ df_activation: {len(df_activation)} activation records")
    print(f"    ✓ df_oos_yesterday: {len(df_oos_yesterday)} OOS yesterday records")
    print(f"    ✓ df_po_data: {len(df_po_data)} PO records")
    
    # Process and merge data
    print("\n" + "-" * 60)
    print("PROCESSING DATA...")
    print("-" * 60)
    
    # Build product metrics
    print("\n[A] Building product metrics...")
    df_metrics = build_product_metrics(
        df_stocks, df_sales, df_whs, df_prices, df_cogs,
        df_mp, df_bsp, df_scrapped, df_cat_brand_targets,
        df_cat_targets, df_discounted, df_activation, df_oos_yesterday, df_po_data
    )
    print(f"    ✓ {len(df_metrics)} product-warehouse combinations")
    
    # Add scoring and classification
    print("\n[B] Adding scoring and classification...")
    df_scored = add_scoring_classification(df_metrics, df_commercial)
    print(f"    ✓ Scoring added")
    
    # Add final scoring
    print("\n[C] Adding final scoring (market position, price comments)...")
    df_final_scored = add_final_scoring(df_scored)
    print(f"    ✓ Final scoring added")
    
    # Add actions
    print("\n[D] Determining recommended actions...")
    df_with_actions = add_actions(df_final_scored)
    print(f"    ✓ Actions determined")
    
    # Add stock issue owner
    print("\n[E] Determining stock issue ownership...")
    df_with_stock_owner = add_stock_issue_owner(df_with_actions)
    print(f"    ✓ Stock issue ownership determined")
    
    # Assign teams based on final actions
    print("\n[F] Assigning teams...")
    df_with_teams = assign_teams(df_with_stock_owner)
    print(f"    ✓ Teams assigned")
    
    # Finalize output
    print("\n[G] Finalizing output...")
    df_final = finalize_output(df_with_teams, df_targets)
    print(f"    ✓ Final output ready with {len(df_final)} records")
    
    print("\n" + "=" * 60)
    print("ANALYSIS COMPLETE!")
    print("=" * 60)
    
    return df_final

# Run the full analysis:
df_result = run_pricing_status_analysis()
df_result.head()


In [None]:
# =============================================================================
# PART 20: UTILITY FUNCTIONS - EXPORT & SUMMARY
# =============================================================================

def export_results(df, filename='pricing_status_output.xlsx'):
    """Export results to Excel file."""
    df.to_excel(filename, index=False)
    print(f"Results exported to {filename}")
    return filename

def get_summary_stats(df):
    """Generate summary statistics from the analysis results."""
    summary = {
        'Total SKU-Warehouse combinations': len(df),
        'Unique Products': df['product_id'].nunique() if 'product_id' in df.columns else 0,
        'Unique Warehouses': df['warehouse_id'].nunique() if 'warehouse_id' in df.columns else 0,
    }
    
    # Stock status breakdown
    if 'stock_comment' in df.columns:
        stock_status = df['stock_comment'].value_counts().to_dict()
        summary['Stock Status'] = stock_status
    
    # Action breakdown
    if 'action' in df.columns:
        action_counts = df['action'].value_counts().to_dict()
        summary['Actions'] = action_counts
    
    # Team assignments
    if 'pricing_team' in df.columns:
        summary['Pricing Team Items'] = df['pricing_team'].notna().sum()
    if 'purchase_team' in df.columns:
        summary['Purchase Team Items'] = df['purchase_team'].notna().sum()
    if 'commercial_team' in df.columns:
        summary['Commercial Team Items'] = df['commercial_team'].notna().sum()
    
    # Market position breakdown
    if 'market_position_status' in df.columns:
        market_pos = df['market_position_status'].value_counts().to_dict()
        summary['Market Position'] = market_pos
    
    return summary

def print_summary(summary):
    """Pretty print the summary statistics."""
    print("\n" + "=" * 60)
    print("SUMMARY STATISTICS")
    print("=" * 60)
    
    for key, value in summary.items():
        if isinstance(value, dict):
            print(f"\n{key}:")
            for k, v in value.items():
                print(f"    {k}: {v}")
        else:
            print(f"{key}: {value}")
    
    print("\n" + "=" * 60)

# Usage:
summary = get_summary_stats(df_result)
print_summary(summary)
export_results(df_result, 'pricing_status_output.xlsx')


# Pricing Status Analysis - Quick Reference

## Data Flow Overview:

1. **Static Data**: Warehouse mappings (region, cohort_id)
2. **COGS**: Current cost of goods (wac_p)
3. **Running Rates**: Predicted running rates from past 14 days
4. **Stocks**: Available stock with DOH calculations
5. **Sales**: 150-day sales history with percentile metrics
6. **Prices**: Latest cohort pricing
7. **Market Prices**: Min/Mod/Max from marketplace, Ben Soliman, and scraped data
8. **Targets**: Category/brand margin targets
9. **Discounts**: Bundle, SKU discount, quantity discount percentages
10. **Commercial Constraints**: Minimum price restrictions

## Key Metrics:

| Metric | Description |
|--------|-------------|
| `doh` | Days on Hand (stocks / running_rate) |
| `bm` | Basic Margin ((price - cost) / price) |
| `high_rr` | 80th percentile of historical running rate |
| `combined_price_score` | Position within market price range (0-1) |

## Action Matrix:

| Stock Status | Price Status | RR Status | Recommended Action |
|--------------|--------------|-----------|-------------------|
| OOS | - | - | Purchase |
| Good stocks | Low/Below target | Low RR | Offers & Credit Note |
| Good stocks | High | Low RR | Reduce price / Remove commercial min |
| Low stock | - | Very High RR | Increase price |
| Over Stocked | High | Low RR, cu_rr=0 | Check activation |

## Configuration:

To customize the analysis, modify:
- `get_warehouse_mapping()` - Add/remove warehouses
- `fetch_prices()` - Modify cohort_ids
- `determine_action()` - Adjust action logic thresholds


In [None]:
# =============================================================================
# RUN ANALYSIS - UNCOMMENT AND EXECUTE
# =============================================================================

# Run the full analysis:
df_result = run_pricing_status_analysis()

# View summary:
summary = get_summary_stats(df_result)
print_summary(summary)

# Preview the data:
df_result.head(20)


In [None]:
# =============================================================================
# EXPORT RESULTS TO EXCEL (Optional)
# =============================================================================

# Export:
export_results(df_result, 'pricing_status_output.xlsx')


In [None]:
# =============================================================================
# AGGREGATE ANALYSIS VIEW
# =============================================================================

def create_aggregate_analysis(df):
    """
    Create aggregate analysis showing:
    - Total target NMV (high_rr * price)
    - Top dropping brands based on RR performance
    - Market status breakdown by brand
    - Required actions summary
    """
    df_analysis = df.copy()
    
    # Calculate target NMV per row (high_rr * price)
    df_analysis['target_nmv'] = df_analysis['high_rr'] * df_analysis['price']
    
    # Calculate RR drop percentage: (high_rr - current_rr) / high_rr
    df_analysis['rr_drop_pct'] = (df_analysis['high_rr'] - df_analysis['current_rr']) / df_analysis['high_rr'].replace(0, np.nan)
    
    # ==========================================================================
    # 1. TOTAL TARGET NMV SUMMARY
    # ==========================================================================
    total_target_nmv = df_analysis['target_nmv'].sum()
    total_current_nmv = (df_analysis['current_rr'] * df_analysis['price']).sum()
    nmv_gap = total_target_nmv - total_current_nmv
    nmv_gap_pct = nmv_gap / total_target_nmv * 100 if total_target_nmv > 0 else 0
    
    print("=" * 80)
    print("📊 AGGREGATE ANALYSIS - PRICING STATUS")
    print("=" * 80)
    
    print("\n" + "─" * 80)
    print("💰 TOTAL NMV SUMMARY")
    print("─" * 80)
    print(f"  Target NMV (High RR × Price):    {total_target_nmv:>15,.0f} EGP")
    print(f"  Current NMV (Current RR × Price): {total_current_nmv:>15,.0f} EGP")
    print(f"  NMV Gap:                          {nmv_gap:>15,.0f} EGP ({nmv_gap_pct:.1f}%)")
    
    # ==========================================================================
    # 2. TOP DROPPING BRANDS ANALYSIS
    # ==========================================================================
    brand_agg = df_analysis.groupby('brand').agg({
        'target_nmv': 'sum',
        'high_rr': 'sum',
        'current_rr': 'sum',
        'price': 'mean',
        'product_id': 'nunique',
        'warehouse_id': 'nunique'
    }).reset_index()
    
    brand_agg.columns = ['brand', 'target_nmv', 'total_high_rr', 'total_current_rr', 
                         'avg_price', 'num_products', 'num_warehouses']
    
    # Calculate current NMV and drop metrics
    brand_agg['current_nmv'] = brand_agg['total_current_rr'] * brand_agg['avg_price']
    brand_agg['nmv_drop'] = brand_agg['target_nmv'] - brand_agg['current_nmv']
    brand_agg['rr_drop_pct'] = ((brand_agg['total_high_rr'] - brand_agg['total_current_rr']) / 
                                 brand_agg['total_high_rr'].replace(0, np.nan) * 100)
    
    # Sort by NMV drop (biggest drops first)
    brand_agg_sorted = brand_agg.sort_values('nmv_drop', ascending=False)
    
    print("\n" + "─" * 80)
    print("📉 TOP 15 DROPPING BRANDS (by NMV Gap)")
    print("─" * 80)
    
    top_dropping = brand_agg_sorted.head(15)
    print(f"{'Brand':<30} {'Target NMV':>15} {'Current NMV':>15} {'NMV Drop':>15} {'RR Drop%':>10}")
    print("─" * 85)
    for _, row in top_dropping.iterrows():
        print(f"{str(row['brand'])[:30]:<30} {row['target_nmv']:>15,.0f} {row['current_nmv']:>15,.0f} "
              f"{row['nmv_drop']:>15,.0f} {row['rr_drop_pct']:>9.1f}%")
    
    # ==========================================================================
    # 3. MARKET STATUS BY DROPPING BRANDS
    # ==========================================================================
    # Get top 15 dropping brand names
    top_dropping_brands = top_dropping['brand'].tolist()
    
    # Filter data to only include top dropping brands
    df_top_brands = df_analysis[df_analysis['brand'].isin(top_dropping_brands)]
    
    # Market status breakdown for top dropping brands
    market_status_by_brand = df_top_brands.groupby(['brand', 'market_position_status']).agg({
        'target_nmv': 'sum',
        'product_id': 'nunique'
    }).reset_index()
    
    market_status_by_brand.columns = ['brand', 'market_position', 'target_nmv', 'num_skus']
    
    # Pivot for better view
    market_pivot = market_status_by_brand.pivot_table(
        index='brand', 
        columns='market_position', 
        values='num_skus', 
        fill_value=0
    ).reset_index()
    
    print("\n" + "─" * 80)
    print("🏪 MARKET POSITION STATUS (Top Dropping Brands - SKU Count)")
    print("─" * 80)
    print(market_pivot.to_string(index=False))
    
    # ==========================================================================
    # 4. REQUIRED ACTIONS BY BRAND
    # ==========================================================================
    actions_by_brand = df_top_brands.groupby(['brand', 'action']).agg({
        'target_nmv': 'sum',
        'product_id': 'nunique'
    }).reset_index()
    
    actions_by_brand.columns = ['brand', 'action', 'target_nmv', 'num_skus']
    
    # Pivot actions
    action_pivot = actions_by_brand.pivot_table(
        index='brand',
        columns='action',
        values='num_skus',
        fill_value=0
    ).reset_index()
    
    print("\n" + "─" * 80)
    print("⚡ REQUIRED ACTIONS (Top Dropping Brands - SKU Count)")
    print("─" * 80)
    print(action_pivot.to_string(index=False))
    
    # ==========================================================================
    # 5. ACTION SUMMARY FOR TOP DROPPING BRANDS
    # ==========================================================================
    action_summary = df_top_brands.groupby('action').agg({
        'target_nmv': 'sum',
        'product_id': 'nunique',
        'brand': 'nunique'
    }).reset_index()
    
    action_summary.columns = ['action', 'target_nmv_at_risk', 'num_skus', 'num_brands']
    action_summary = action_summary.sort_values('target_nmv_at_risk', ascending=False)
    
    print("\n" + "─" * 80)
    print("📋 ACTION PRIORITY SUMMARY (Top Dropping Brands)")
    print("─" * 80)
    print(f"{'Action':<35} {'Target NMV at Risk':>18} {'# SKUs':>10} {'# Brands':>10}")
    print("─" * 73)
    for _, row in action_summary.iterrows():
        action_name = str(row['action']) if pd.notna(row['action']) else 'No Action'
        print(f"{action_name[:35]:<35} {row['target_nmv_at_risk']:>18,.0f} {row['num_skus']:>10} {row['num_brands']:>10}")
    
    print("\n" + "=" * 80)
    
    # Return dataframes for further analysis
    return {
        'total_metrics': {
            'target_nmv': total_target_nmv,
            'current_nmv': total_current_nmv,
            'nmv_gap': nmv_gap,
            'nmv_gap_pct': nmv_gap_pct
        },
        'brand_analysis': brand_agg_sorted,
        'market_status_pivot': market_pivot,
        'action_pivot': action_pivot,
        'action_summary': action_summary
    }

# Run the aggregate analysis
aggregate_results = create_aggregate_analysis(df_result)


In [None]:
# =============================================================================
# DETAILED BRAND DRILLDOWN VIEW
# =============================================================================

def get_brand_drilldown(df, brand_name):
    """
    Get detailed drilldown for a specific brand showing:
    - All SKUs for the brand
    - Their market status, RR status, and recommended actions
    """
    df_brand = df[df['brand'] == brand_name].copy()
    
    if len(df_brand) == 0:
        print(f"No data found for brand: {brand_name}")
        return None
    
    # Calculate target NMV
    df_brand['target_nmv'] = df_brand['high_rr'] * df_brand['price']
    df_brand['current_nmv'] = df_brand['current_rr'] * df_brand['price']
    df_brand['nmv_gap'] = df_brand['target_nmv'] - df_brand['current_nmv']
    
    print(f"\n{'='*80}")
    print(f"🔍 BRAND DRILLDOWN: {brand_name}")
    print(f"{'='*80}")
    
    # Summary stats
    print(f"\n📊 Summary:")
    print(f"   Total SKUs: {df_brand['product_id'].nunique()}")
    print(f"   Warehouses: {df_brand['warehouse_id'].nunique()}")
    print(f"   Target NMV: {df_brand['target_nmv'].sum():,.0f} EGP")
    print(f"   Current NMV: {df_brand['current_nmv'].sum():,.0f} EGP")
    print(f"   NMV Gap: {df_brand['nmv_gap'].sum():,.0f} EGP")
    
    # Show detailed SKU breakdown
    columns_to_show = ['warehouse_name', 'sku', 'price', 'high_rr', 'current_rr', 
                       'stock_comment', 'market_position_status', 'price_comment', 
                       'rr_comment', 'action', 'nmv_gap']
    
    existing_cols = [c for c in columns_to_show if c in df_brand.columns]
    
    df_display = df_brand[existing_cols].sort_values('nmv_gap', ascending=False)
    
    print(f"\n📋 SKU Details (sorted by NMV Gap):")
    print(df_display.to_string(index=False))
    
    return df_brand

# View top dropping brands DataFrame
print("📈 TOP DROPPING BRANDS (Full DataFrame):")
aggregate_results['brand_analysis'].head(15)


In [None]:
# =============================================================================
# EXAMPLE: DRILLDOWN INTO TOP DROPPING BRAND
# =============================================================================

# Get the top dropping brand name
top_brand = aggregate_results['brand_analysis'].iloc[0]['brand']

# Drilldown into the top dropping brand
brand_detail = get_brand_drilldown(df_result, top_brand)

# Or specify a brand manually:
# brand_detail = get_brand_drilldown(df_result, "Your Brand Name Here")


In [None]:
# =============================================================================
# EXPORT AGGREGATE ANALYSIS TO EXCEL
# =============================================================================

def get_team_sheet(df, team_flag_column, team_name):
    """
    Get SKUs assigned to a specific team based on the team flag.
    Sorted by NMV gap descending.
    
    Args:
        df: DataFrame with all SKU data
        team_flag_column: Column name for the team flag (e.g., 'pricing_team')
        team_name: Name of the team for display
    
    Returns:
        DataFrame filtered and sorted for the team
    """
    df_team = df.copy()
    
    # Calculate NMV metrics
    df_team['target_nmv'] = df_team['high_rr'] * df_team['price']
    df_team['current_nmv'] = df_team['current_rr'] * df_team['price']
    df_team['nmv_gap'] = df_team['target_nmv'] - df_team['current_nmv']
    df_team['margin_gap'] = df_team['bm'] - df_team['target_margin']
    
    # Filter by team flag = 1
    df_team = df_team[df_team[team_flag_column] == 1].copy()
    
    # Sort by NMV gap descending
    df_team = df_team.sort_values('nmv_gap', ascending=False)
    
    # Select relevant columns
    columns_to_export = [
        'region', 'warehouse_name', 'product_id', 'sku', 'cat', 'brand',
        'stocks', 'doh', 'stock_comment', 'wac_p',
        'price', 'blended_price', 'bm', 'blended_margin', 'target_margin', 'margin_gap', 'price_comment',
        'combined_min_market', 'combined_median_market', 'combined_max_market',
        'market_position_status',
        'high_rr', 'current_rr', 'today_rr', 'high_mtd_rr', 'cu_mtd_rr', 'rr_comment',
        'action', 'sku_discount_perc', 'quantity_discount_perc', 'offers_perc', 'commercial_min', 'activation',
        'target_nmv', 'current_nmv', 'nmv_gap'
    ]
    
    existing_cols = [c for c in columns_to_export if c in df_team.columns]
    
    return df_team[existing_cols]


def get_team_summary(df):
    """
    Create a summary showing each team's total assignments and NMV gap responsibility.
    """
    df_summary = df.copy()
    
    # Calculate NMV metrics
    df_summary['target_nmv'] = df_summary['high_rr'] * df_summary['price']
    df_summary['current_nmv'] = df_summary['current_rr'] * df_summary['price']
    df_summary['nmv_gap'] = df_summary['target_nmv'] - df_summary['current_nmv']
    
    # Calculate team metrics
    teams_data = []
    
    # Pricing Team
    pricing_df = df_summary[df_summary['pricing_team'] == 1]
    teams_data.append({
        'Team': 'Pricing Team',
        'Total SKUs Assigned': len(pricing_df),
        'Unique Products': pricing_df['product_id'].nunique(),
        'Total Target NMV': pricing_df['target_nmv'].sum(),
        'Total Current NMV': pricing_df['current_nmv'].sum(),
        'Total NMV Gap': pricing_df['nmv_gap'].sum(),
        'NMV Gap %': (pricing_df['nmv_gap'].sum() / pricing_df['target_nmv'].sum() * 100) if pricing_df['target_nmv'].sum() > 0 else 0
    })
    
    # Purchase Team
    purchase_df = df_summary[df_summary['purchase_team'] == 1]
    teams_data.append({
        'Team': 'Purchase Team',
        'Total SKUs Assigned': len(purchase_df),
        'Unique Products': purchase_df['product_id'].nunique(),
        'Total Target NMV': purchase_df['target_nmv'].sum(),
        'Total Current NMV': purchase_df['current_nmv'].sum(),
        'Total NMV Gap': purchase_df['nmv_gap'].sum(),
        'NMV Gap %': (purchase_df['nmv_gap'].sum() / purchase_df['target_nmv'].sum() * 100) if purchase_df['target_nmv'].sum() > 0 else 0
    })
    
    # Commercial Team
    commercial_df = df_summary[df_summary['commercial_team'] == 1]
    teams_data.append({
        'Team': 'Commercial Team',
        'Total SKUs Assigned': len(commercial_df),
        'Unique Products': commercial_df['product_id'].nunique(),
        'Total Target NMV': commercial_df['target_nmv'].sum(),
        'Total Current NMV': commercial_df['current_nmv'].sum(),
        'Total NMV Gap': commercial_df['nmv_gap'].sum(),
        'NMV Gap %': (commercial_df['nmv_gap'].sum() / commercial_df['target_nmv'].sum() * 100) if commercial_df['target_nmv'].sum() > 0 else 0
    })
    
    # Total (all teams combined - note: some SKUs may be assigned to multiple teams)
    any_team = df_summary[(df_summary['pricing_team'] == 1) | 
                          (df_summary['purchase_team'] == 1) | 
                          (df_summary['commercial_team'] == 1)]
    teams_data.append({
        'Team': 'TOTAL (All Teams)',
        'Total SKUs Assigned': len(any_team),
        'Unique Products': any_team['product_id'].nunique(),
        'Total Target NMV': any_team['target_nmv'].sum(),
        'Total Current NMV': any_team['current_nmv'].sum(),
        'Total NMV Gap': any_team['nmv_gap'].sum(),
        'NMV Gap %': (any_team['nmv_gap'].sum() / any_team['target_nmv'].sum() * 100) if any_team['target_nmv'].sum() > 0 else 0
    })
    
    df_teams = pd.DataFrame(teams_data)
    
    # Sort by NMV Gap descending
    df_teams = df_teams.sort_values('Total NMV Gap', ascending=False)
    
    return df_teams


def export_aggregate_analysis(df, aggregate_results, filename='pricing_aggregate_analysis.xlsx'):
    """
    Export the aggregate analysis to an Excel file with multiple sheets:
    - Team Summary: Aggregate view of each team's assignments and NMV gap
    - Pricing Team: SKUs assigned to pricing team (sorted by NMV gap desc)
    - Purchase Team: SKUs assigned to purchase team (sorted by NMV gap desc)
    - Commercial Team: SKUs assigned to commercial team (sorted by NMV gap desc)
    - Brand Analysis: Top dropping brands
    - Market Status: Market position by brand
    - Actions by Brand: Required actions by brand
    - Action Summary: Summary of actions
    - Raw Data: Full detail data
    """
    df_analysis = df.copy()
    df_analysis['target_nmv'] = df_analysis['high_rr'] * df_analysis['price']
    df_analysis['current_nmv'] = df_analysis['current_rr'] * df_analysis['price']
    df_analysis['nmv_gap'] = df_analysis['target_nmv'] - df_analysis['current_nmv']
    
    # Get team sheets
    df_pricing_team = get_team_sheet(df, 'pricing_team', 'Pricing Team')
    df_purchase_team = get_team_sheet(df, 'purchase_team', 'Purchase Team')
    df_commercial_team = get_team_sheet(df, 'commercial_team', 'Commercial Team')
    df_team_summary = get_team_summary(df)
    
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        # Sheet 1: Team Summary - Aggregate view of each team
        df_team_summary.to_excel(writer, sheet_name='Team Summary', index=False)
        
        # Sheet 2: Pricing Team - sorted by NMV gap descending
        df_pricing_team.to_excel(writer, sheet_name='Pricing Team', index=False)
        
        # Sheet 3: Purchase Team - sorted by NMV gap descending
        df_purchase_team.to_excel(writer, sheet_name='Purchase Team', index=False)
        
        # Sheet 4: Commercial Team - sorted by NMV gap descending
        df_commercial_team.to_excel(writer, sheet_name='Commercial Team', index=False)
        
        # Sheet 5: Brand Analysis (all brands)
        aggregate_results['brand_analysis'].to_excel(writer, sheet_name='Brand Analysis', index=False)
        
        # Sheet 6: Market Status Pivot
        aggregate_results['market_status_pivot'].to_excel(writer, sheet_name='Market Status', index=False)
        
        # Sheet 7: Action Pivot
        aggregate_results['action_pivot'].to_excel(writer, sheet_name='Actions by Brand', index=False)
        
        # Sheet 8: Action Summary
        aggregate_results['action_summary'].to_excel(writer, sheet_name='Action Summary', index=False)
        
        # Sheet 9: Raw Data with NMV calculations
        df_analysis.to_excel(writer, sheet_name='Raw Data', index=False)
    
    print(f"✓ Aggregate analysis exported to: {filename}")
    print(f"\n📊 TEAM ASSIGNMENTS SUMMARY:")
    print(f"  - Pricing Team:    {len(df_pricing_team):>6} SKUs | NMV Gap: {df_pricing_team['nmv_gap'].sum():>15,.0f} EGP")
    print(f"  - Purchase Team:   {len(df_purchase_team):>6} SKUs | NMV Gap: {df_purchase_team['nmv_gap'].sum():>15,.0f} EGP")
    print(f"  - Commercial Team: {len(df_commercial_team):>6} SKUs | NMV Gap: {df_commercial_team['nmv_gap'].sum():>15,.0f} EGP")
    return filename

# Export:
export_aggregate_analysis(df_result, aggregate_results, 'pricing_aggregate_analysis.xlsx')
