In [None]:
# Pricing Status Analysis Script
# Converted from SQL query to Python for easier editing and maintenance

# =============================================================================
# STANDARD LIBRARY IMPORTS
# =============================================================================
import os
import warnings
from datetime import datetime, date, timedelta

# =============================================================================
# THIRD-PARTY IMPORTS
# =============================================================================
import numpy as np
import pandas as pd
import snowflake.connector

# =============================================================================
# LOCAL IMPORTS & ENVIRONMENT SETUP
# =============================================================================
import setup_environment_2
import importlib

warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

print("‚úì Environment initialized")

# =============================================================================
# SNOWFLAKE QUERY FUNCTION
# =============================================================================

def snowflake_query(country, query, warehouse=None, columns=[], conn=None):
    """
    Execute a query against Snowflake and return results as DataFrame.
    
    Args:
        country: Country identifier (e.g., "Egypt")
        query: SQL query string to execute
        warehouse: Snowflake warehouse (optional)
        columns: Custom column names (optional)
        conn: Existing connection (optional)
        
    Returns:
        pandas DataFrame with query results
    """
    con = snowflake.connector.connect(
        user     = os.environ["SNOWFLAKE_USERNAME"],
        account  = os.environ["SNOWFLAKE_ACCOUNT"],
        password = os.environ["SNOWFLAKE_PASSWORD"],
        database = os.environ["SNOWFLAKE_DATABASE"]
    )

    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        
        column_names = [col[0] for col in cur.description]
        results = cur.fetchall()
        
        if not results:
            out = pd.DataFrame(columns=[name.lower() for name in column_names])
        else:
            if len(columns) == 0:
                out = pd.DataFrame(np.array(results), columns=column_names)
                out.columns = out.columns.str.lower()
            else:
                out = pd.DataFrame(np.array(results), columns=columns)
                out.columns = out.columns.str.lower()
        
        return out
        
    except Exception as e:
        print(f"‚ùå Query error: {e}")
        raise
        
    finally:
        cur.close()
        con.close()

print("‚úì Snowflake query function loaded")


In [None]:
# =============================================================================
# PART 1: STATIC DATA - Warehouse Mapping
# =============================================================================

def get_warehouse_mapping():
    """Define warehouse to region/cohort mapping."""
    whs_data = [
        ('Cairo', 'Mostorod', 1, 700),
        ('Giza', 'Barageel', 236, 701),
        ('Delta West', 'El-Mahala', 337, 703),
        ('Delta West', 'Tanta', 8, 703),
        ('Delta East', 'Mansoura FC', 339, 704),
        ('Delta East', 'Sharqya', 170, 704),
        ('Upper Egypt', 'Assiut FC', 501, 1124),
        ('Upper Egypt', 'Bani sweif', 401, 1126),
        ('Upper Egypt', 'Menya Samalot', 703, 1123),
        ('Upper Egypt', 'Sohag', 632, 1125),
        ('Alexandria', 'Khorshed Alex', 797, 702),
        ('Giza', 'Sakkarah', 962, 701)
    ]
    
    df_whs = pd.DataFrame(whs_data, columns=['region', 'wh', 'warehouse_id', 'cohort_id'])
    return df_whs

# Get warehouse mapping
df_whs = get_warehouse_mapping()
print("Warehouse Mapping:")
df_whs


In [None]:
# =============================================================================
# PART 2: FETCH COGS DATA
# =============================================================================

def fetch_current_cogs():
    """Fetch current cost of goods sold data."""
    query = """
    SELECT product_id, wac_p
    FROM finance.all_cogs
    WHERE CURRENT_TIMESTAMP BETWEEN from_date AND to_date
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['wac_p'] = pd.to_numeric(df['wac_p'])
    return df

# Run:
df_cogs = fetch_current_cogs()
print(f"COGS records: {len(df_cogs)}")


In [None]:
# =============================================================================
# PART 3: FETCH RUNNING RATES DATA
# =============================================================================

def fetch_running_rates():
    """Fetch predicted running rates - latest per product/warehouse within 14 days."""
    query = """
    SELECT product_id, warehouse_id, rr
    FROM finance.PREDICTED_RUNNING_RATES
    QUALIFY MAX(date) OVER (PARTITION BY product_id, warehouse_id) = date
        AND date::DATE >= CURRENT_DATE - 14
    """
    df = snowflake_query("Egypt", query)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_rr = fetch_running_rates()
print(f"Running rates records: {len(df_rr)}")


In [None]:
# =============================================================================
# PART 4: FETCH STOCKS DATA
# =============================================================================

def fetch_stocks():
    """Fetch stock data with running rates and DOH calculation."""
    query = """
    WITH rr AS (
        SELECT product_id, warehouse_id, rr
        FROM finance.PREDICTED_RUNNING_RATES
        QUALIFY MAX(date) OVER (PARTITION BY product_id, warehouse_id) = date
            AND date::DATE >= CURRENT_DATE - 14
    )
    SELECT 
        pw.warehouse_id,
        pw.product_id,
        pw.available_stock::INTEGER AS stocks,
        COALESCE(rr.rr, 0) AS rr,
        CASE WHEN COALESCE(rr.rr, 0) = 0 THEN pw.available_stock::INTEGER 
             ELSE pw.available_stock::INTEGER / rr.rr 
        END AS doh
    FROM product_warehouse pw
    LEFT JOIN rr ON rr.product_id = pw.product_id AND rr.warehouse_id = pw.warehouse_id
    WHERE pw.warehouse_id NOT IN (6, 9, 10)
        AND pw.is_basic_unit = 1
    """
    df = snowflake_query("Egypt", query)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_stocks = fetch_stocks()
print(f"Stock records: {len(df_stocks)}")


In [None]:
# =============================================================================
# PART 5: FETCH SALES DATA
# =============================================================================

def fetch_sales():
    """Fetch sales data with aggregations for RR and retailer metrics."""
    query = """
    SELECT 
        warehouse_id, 
        product_id,
        sku,
        brand,
        cat,
        PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY CASE WHEN date < CURRENT_DATE - 3 THEN qty END) AS high_rr,
        PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY CASE WHEN date < CURRENT_DATE - 3 THEN num_rets END) AS high_rets,
        COALESCE(STDDEV(CASE WHEN date < CURRENT_DATE - 3 THEN qty END), 0) AS qty_std,
        COALESCE(STDDEV(CASE WHEN date < CURRENT_DATE - 3 THEN num_rets END), 0) AS rets_std,
        COALESCE(SUM(CASE WHEN date = CURRENT_DATE - 1 THEN qty END), 0) AS cu_rr,
        COALESCE(SUM(CASE WHEN date = CURRENT_DATE THEN qty END), 0) AS today_rr,
        COALESCE(SUM(CASE WHEN date = CURRENT_DATE - 1 THEN num_rets END), 0) AS cu_rets
    FROM (
        SELECT
            so.created_at::DATE AS date,
            pso.warehouse_id,
            pso.product_id,
            CONCAT(p.name_ar, ' ', p.size, ' ', pu.name_ar) AS sku,
            b.name_ar AS brand, 
            c.name_ar AS cat,
            SUM(pso.purchased_item_count * pso.basic_unit_count) AS qty,
            COUNT(DISTINCT so.retailer_id) AS num_rets
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN products p ON p.id = pso.product_id
        JOIN brands b ON p.brand_id = b.id 
        JOIN categories c ON p.category_id = c.id
        JOIN product_units pu ON pu.id = p.unit_id
        WHERE so.created_at::DATE BETWEEN CURRENT_DATE - 150 AND CURRENT_DATE 
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
            AND DAYNAME(so.created_at::DATE) <> 'Fri'
        GROUP BY 1, 2, 3, 4, 5, 6
    )
    GROUP BY 1, 2, 3, 4, 5
    """
    df = snowflake_query("Egypt", query)
    # Convert numeric columns
    numeric_cols = ['warehouse_id', 'product_id', 'high_rr', 'high_rets', 'qty_std', 'rets_std', 'cu_rr', 'today_rr', 'cu_rets']
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_sales = fetch_sales()
print(f"Sales records: {len(df_sales)}")


In [None]:
# =============================================================================
# PART 6: FETCH PRICES DATA
# =============================================================================

def fetch_prices():
    """Fetch latest prices per product/cohort."""
    cohort_ids = [700, 701, 702, 703, 704, 696, 695, 698, 697, 699, 1123, 1124, 1125, 1126]
    cohort_str = ', '.join(map(str, cohort_ids))
    
    query = f"""
    SELECT cohort_id, product_id, price
    FROM (
        SELECT 
            cpc.cohort_id,
            pu.product_id,
            cpc.price,
            ROW_NUMBER() OVER (PARTITION BY pu.product_id, cpc.cohort_id ORDER BY cpc.created_at DESC) AS rn
        FROM cohort_pricing_changes cpc 
        JOIN PACKING_UNIT_PRODUCTS pu ON pu.id = cpc.product_packing_unit_id
        WHERE cpc.cohort_id IN ({cohort_str})
            AND pu.is_basic_unit = 1 
    )
    WHERE rn = 1
    """
    df = snowflake_query("Egypt", query)
    df['cohort_id'] = pd.to_numeric(df['cohort_id'])
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['price'] = pd.to_numeric(df['price'])
    return df

# Run:
df_prices = fetch_prices()
print(f"Price records: {len(df_prices)}")


In [None]:
# =============================================================================
# PART 7: FETCH MARKETPLACE PRICES DATA
# =============================================================================

def fetch_marketplace_prices():
    """Fetch marketplace price data (min, mod, max)."""
    query = """
    SELECT 
        mp.region,
        mp.product_id,
        AVG(mp.min_price / pup.basic_unit_count) AS min_price,
        AVG(mp.mod_price / pup.basic_unit_count) AS mod_price,
        AVG(mp.max_price / pup.basic_unit_count) AS max_price
    FROM materialized_views.marketplace_prices mp
    JOIN PACKING_UNIT_PRODUCTS pup ON pup.product_id = mp.product_id AND mp.pu_id = pup.packing_unit_id
    GROUP BY 1, 2
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['min_price'] = pd.to_numeric(df['min_price'])
    df['mod_price'] = pd.to_numeric(df['mod_price'])
    df['max_price'] = pd.to_numeric(df['max_price'])
    return df

# Run:
df_mp = fetch_marketplace_prices()
print(f"Marketplace price records: {len(df_mp)}")


In [None]:
# =============================================================================
# PART 8: FETCH BEN SOLIMAN PRICES
# =============================================================================

def fetch_ben_soliman_prices():
    """Fetch Ben Soliman competitor prices with validation."""
    query = """
    SELECT z.* 
    FROM (
        SELECT maxab_product_id AS product_id, AVG(bs_final_price) AS ben_soliman_price
        FROM (
            SELECT *, ROW_NUMBER() OVER(PARTITION BY maxab_product_id ORDER BY diff) AS rnk_2
            FROM (
                SELECT *, (bs_final_price - wac_p) / wac_p AS diff_2
                FROM (
                    SELECT *, bs_price / maxab_basic_unit_count AS bs_final_price
                    FROM (
                        SELECT *, ROW_NUMBER() OVER(PARTITION BY maxab_product_id, maxab_pu ORDER BY diff) AS rnk 
                        FROM (
                            SELECT *, MAX(INJECTION_DATE::DATE) OVER(PARTITION BY maxab_product_id, maxab_pu) AS max_date
                            FROM (
                                SELECT sm.*, wac1, wac_p, 
                                    ABS(bs_price - (wac_p * maxab_basic_unit_count)) / (wac_p * maxab_basic_unit_count) AS diff 
                                FROM materialized_views.savvy_mapping sm 
                                JOIN finance.all_cogs f ON f.product_id = sm.maxab_product_id 
                                    AND CURRENT_TIMESTAMP BETWEEN f.from_date AND f.to_date
                                WHERE bs_price IS NOT NULL 
                                    AND INJECTION_DATE::DATE >= CURRENT_TIMESTAMP::DATE - 5 
                                    AND ABS(bs_price - (wac_p * maxab_basic_unit_count)) / (wac_p * maxab_basic_unit_count) < 0.3
                            )
                            QUALIFY max_date = INJECTION_DATE
                        )
                        QUALIFY rnk = 1 
                    )
                )
                WHERE diff_2 BETWEEN -0.5 AND 0.5 
            )
            QUALIFY rnk_2 = 1 
        )
        GROUP BY ALL
    ) z 
    JOIN finance.all_cogs f ON f.product_id = z.product_id 
        AND CURRENT_TIMESTAMP BETWEEN f.from_date AND f.to_date
    WHERE ben_soliman_price BETWEEN f.wac_p * 0.9 AND f.wac_p * 1.3
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['ben_soliman_price'] = pd.to_numeric(df['ben_soliman_price'])
    return df

# Run:
df_bsp = fetch_ben_soliman_prices()
print(f"Ben Soliman price records: {len(df_bsp)}")


In [None]:
# =============================================================================
# PART 9: FETCH SCRAPPED/CLEANED MARKET PRICES
# =============================================================================

def fetch_scrapped_prices():
    """Fetch scraped market prices with min/max/median aggregations."""
    query = """
    WITH current_cogs AS (
        SELECT product_id, wac_p
        FROM finance.all_cogs
        WHERE CURRENT_TIMESTAMP BETWEEN from_date AND to_date
    )
    SELECT 
        product_id,
        region,
        MIN(market_price) AS min_scrapped,
        MAX(market_price) AS max_scrapped,
        MEDIAN(market_price) AS median_scrapped
    FROM (
        SELECT 
            cmp.product_id,
            cmp.region,
            cmp.market_price
        FROM materialized_views.cleaned_market_prices cmp
        JOIN current_cogs f ON f.product_id = cmp.product_id
        WHERE cmp.date >= CURRENT_DATE - 5
            AND cmp.market_price BETWEEN f.wac_p * 0.9 AND f.wac_p * 1.3
        QUALIFY MAX(cmp.date) OVER (PARTITION BY cmp.region, cmp.product_id, cmp.competitor) = cmp.date
    )
    GROUP BY 1, 2
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['min_scrapped'] = pd.to_numeric(df['min_scrapped'])
    df['max_scrapped'] = pd.to_numeric(df['max_scrapped'])
    df['median_scrapped'] = pd.to_numeric(df['median_scrapped'])
    return df

# Run:
df_scrapped = fetch_scrapped_prices()
print(f"Scrapped price records: {len(df_scrapped)}")


In [None]:
# =============================================================================
# PART 10: FETCH TARGETS DATA
# =============================================================================

def fetch_cat_brand_targets():
    """Fetch category/brand targets from commercial plan."""
    query = """
    SELECT 
        cat, 
        brand, 
        SUM(nmv) AS target_nmv, 
        AVG(margin) AS target_bm,
        DATE_TRUNC('month', DATE) AS month_date
    FROM performance.commercial_targets
    WHERE cat IS NOT NULL AND brand IS NOT NULL 
        AND date >= DATE_TRUNC('month', CURRENT_DATE)
    GROUP BY ALL
    """
    df = snowflake_query("Egypt", query)
    df['target_nmv'] = pd.to_numeric(df['target_nmv'])
    df['target_bm'] = pd.to_numeric(df['target_bm'])
    return df

def fetch_cat_targets(df_cat_brand_targets):
    """Calculate category-level targets from brand targets."""
    df = df_cat_brand_targets.copy()
    df['weighted_margin'] = df['target_bm'] * df['target_nmv']
    cat_targets = df.groupby('cat').apply(
        lambda x: x['weighted_margin'].sum() / x['target_nmv'].sum() if x['target_nmv'].sum() > 0 else 0
    ).reset_index()
    cat_targets.columns = ['cat', 'cat_target_margin']
    return cat_targets

# Run:
df_cat_brand_targets = fetch_cat_brand_targets()
df_cat_targets = fetch_cat_targets(df_cat_brand_targets)
print(f"Cat/Brand target records: {len(df_cat_brand_targets)}")


In [None]:
# =============================================================================
# PART 11: FETCH DISCOUNTED SALES DATA
# =============================================================================

def fetch_discounted_sales():
    """Fetch yesterday's discounted sales breakdown."""
    query = """
    SELECT  
        pso.warehouse_id,
        pso.product_id,
        SUM(pso.total_price) AS total_nmv,
        SUM(CASE WHEN pso.dynamic_bundle_sales_order_id IS NOT NULL THEN pso.total_price END) AS bundle_nmv,
        SUM(CASE WHEN pso.sku_discount_id IS NOT NULL THEN pso.total_price END) AS sku_discount_nmv,
        SUM(CASE WHEN pso.quantity_discount_id IS NOT NULL THEN pso.total_price END) AS quantity_nmv
    FROM product_sales_order pso 
    JOIN sales_orders so ON so.id = pso.sales_order_id
    WHERE so.created_at::DATE = CURRENT_DATE - 1 
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
    GROUP BY 1, 2
    """
    df = snowflake_query("Egypt", query)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_discounted = fetch_discounted_sales()
print(f"Discounted sales records: {len(df_discounted)}")


In [None]:
# =============================================================================
# PART 12: FETCH COMMERCIAL CONSTRAINTS (MIN PRICES)
# =============================================================================

def fetch_commercial_constraints():
    """Fetch commercial minimum price constraints."""
    query = """
    SELECT product_id, region, min_price
    FROM (
        SELECT 
            product_id, 
            region, 
            min_price,
            created_at,
            MAX(created_at) OVER (PARTITION BY product_id, region) AS max_created
        FROM finance.minimum_prices
        WHERE is_deleted = 'false'
            AND created_at BETWEEN 
                CASE WHEN DATE_PART('day', CURRENT_DATE) < 7 
                     THEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') 
                     ELSE DATE_TRUNC('month', CURRENT_DATE)
                END
                AND DATE_TRUNC('month', CURRENT_DATE) + INTERVAL '1 month' + INTERVAL '6 days'
    )
    WHERE created_at = max_created
    """
    df = snowflake_query("Egypt", query)
    df['product_id'] = pd.to_numeric(df['product_id'])
    df['min_price'] = pd.to_numeric(df['min_price'])
    return df

# Run:
df_commercial = fetch_commercial_constraints()
print(f"Commercial constraint records: {len(df_commercial)}")


In [None]:
# =============================================================================
# PART 13: FETCH TARGETS DATA (COMPLEX - WAREHOUSE SKU TARGETS)
# =============================================================================

def fetch_targets_data(df_whs):
    """Fetch complex targets data with warehouse-level SKU targets."""
    # Build warehouse IDs list for the query
    warehouse_ids = df_whs['warehouse_id'].tolist()
    wh_str = ', '.join(map(str, warehouse_ids))
    
    query = f"""
    WITH whs AS (
        SELECT *
        FROM (VALUES
            ('Cairo', 'Mostorod', 1, 700),
            ('Giza', 'Barageel', 236, 701),
            ('Delta West', 'El-Mahala', 337, 703),
            ('Delta West', 'Tanta', 8, 703),
            ('Delta East', 'Mansoura FC', 339, 704),
            ('Delta East', 'Sharqya', 170, 704),
            ('Upper Egypt', 'Assiut FC', 501, 1124),
            ('Upper Egypt', 'Bani sweif', 401, 1126),
            ('Upper Egypt', 'Menya Samalot', 703, 1123),
            ('Upper Egypt', 'Sohag', 632, 1125),
            ('Alexandria', 'Khorshed Alex', 797, 702),
            ('Giza', 'Sakkarah', 962, 701)
        ) x(region, wh, warehouse_id, cohort_id)
    ),
    base_sales AS (
        SELECT
            CASE WHEN whs.region LIKE '%Delta%' THEN 'Delta' 
                 WHEN whs.region = 'Cairo' OR whs.region = 'Giza' THEN 'Greater Cairo' 
                 ELSE whs.region 
            END AS region,
            pso.warehouse_id,
            pso.product_id,
            c.name_ar AS cat,
            b.name_ar AS brand,
            SUM(pso.total_price) AS nmv,
            so.created_at::DATE AS sale_date
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN products p ON p.id = pso.product_id
        JOIN categories c ON c.id = p.category_id
        JOIN brands b ON b.id = p.brand_id
        JOIN whs ON whs.warehouse_id = pso.warehouse_id
        WHERE so.sales_order_status_id NOT IN (7, 12)
            AND pso.purchased_item_count <> 0
            AND so.channel IN ('retailer', 'telesales')
            AND so.created_at::DATE BETWEEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '3 month') AND CURRENT_DATE - 1
        GROUP BY 1, 2, 3, 4, 5, 7
    ),
    region_product_nmv AS (
        SELECT region, product_id, cat, brand, SUM(nmv) AS region_product_nmv
        FROM base_sales
        GROUP BY 1, 2, 3, 4
    ),
    warehouse_contribution AS (
        SELECT 
            bs.region,
            bs.warehouse_id,
            bs.product_id,
            bs.cat,
            bs.brand,
            SUM(bs.nmv) AS warehouse_nmv,
            SUM(bs.nmv) / NULLIF(rpn.region_product_nmv, 0) AS wh_cntrb_in_region
        FROM base_sales bs
        JOIN region_product_nmv rpn ON rpn.region = bs.region 
            AND rpn.product_id = bs.product_id
        GROUP BY 1, 2, 3, 4, 5, rpn.region_product_nmv
    ),
    region_sku_cntrb AS (
        SELECT region, product_id, cat, brand,
            SUM(region_product_nmv) / SUM(SUM(region_product_nmv)) OVER (PARTITION BY region, cat, brand) AS sku_cntrb
        FROM region_product_nmv
        GROUP BY 1, 2, 3, 4
    ),
    comm_plan AS (
        SELECT
            CASE WHEN city = 'Alex' THEN 'Alexandria' ELSE city END AS region,
            cat, brand,
            SUM(nmv) AS target
        FROM performance.commercial_targets
        WHERE date BETWEEN DATE_TRUNC('month', CURRENT_DATE) AND CURRENT_DATE - 1
        GROUP BY 1, 2, 3
    ),
    current_month_sales AS (
        SELECT region, warehouse_id, product_id, SUM(nmv) AS nmv
        FROM base_sales
        WHERE sale_date >= DATE_TRUNC('month', CURRENT_DATE)
        GROUP BY 1, 2, 3
    )
    SELECT 
        wc.region,
        wc.warehouse_id,
        wc.product_id,
        wc.cat,
        wc.brand,
        cp.target * rsc.sku_cntrb AS region_sku_target,
        cp.target * rsc.sku_cntrb * wc.wh_cntrb_in_region AS wh_sku_target,
        COALESCE(cms.nmv, 0) AS sales,
        cp.target * rsc.sku_cntrb * wc.wh_cntrb_in_region - COALESCE(cms.nmv, 0) AS rem_nmv
    FROM warehouse_contribution wc
    JOIN region_sku_cntrb rsc ON rsc.region = wc.region 
        AND rsc.product_id = wc.product_id
    JOIN comm_plan cp ON cp.region = wc.region 
        AND cp.cat = wc.cat 
        AND cp.brand = wc.brand
    LEFT JOIN current_month_sales cms ON cms.product_id = wc.product_id 
        AND cms.warehouse_id = wc.warehouse_id
        AND cms.region = wc.region
    """
    df = snowflake_query("Egypt", query)
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df

# Run:
df_targets = fetch_targets_data(df_whs)
print(f"Targets data records: {len(df_targets)}")


In [None]:
# =============================================================================
# PART 14: BUILD PRODUCT METRICS (MERGE ALL DATA)
# =============================================================================

def build_product_metrics(df_stocks, df_sales, df_whs, df_prices, df_cogs, 
                          df_mp, df_bsp, df_scrapped, df_cat_brand_targets, 
                          df_cat_targets, df_discounted):
    """
    Merge all data sources to build product metrics.
    This replicates the 'product_metrics' CTE from the SQL query.
    """
    
    # Start with stocks and sales join
    df = df_stocks.merge(
        df_sales, 
        on=['product_id', 'warehouse_id'], 
        how='inner'
    )
    
    # Join warehouse mapping
    df = df.merge(df_whs, on='warehouse_id', how='inner')
    
    # Join prices (using cohort_id from warehouse mapping)
    df = df.merge(
        df_prices, 
        on=['product_id', 'cohort_id'], 
        how='inner'
    )
    
    # Join COGS
    df = df.merge(df_cogs, on='product_id', how='inner')
    
    # Calculate BM (basic margin)
    df['bm'] = (df['price'] - df['wac_p']) / df['price'].replace(0, np.nan)
    
    # Calculate in_stock_perc
    df['in_stock_perc'] = (df['stocks'] > 0).astype(int)
    
    # Join marketplace prices
    df = df.merge(
        df_mp.rename(columns={
            'min_price': 'mp_min_price',
            'mod_price': 'mp_mod_price', 
            'max_price': 'mp_max_price'
        }), 
        on=['product_id', 'region'], 
        how='left'
    )
    
    # Join Ben Soliman prices
    df = df.merge(df_bsp[['product_id', 'ben_soliman_price']], on='product_id', how='left')
    
    # Join scrapped prices
    df = df.merge(
        df_scrapped.rename(columns={
            'min_scrapped': 'min_scrapped',
            'max_scrapped': 'max_scrapped',
            'median_scrapped': 'median_scrapped'
        }), 
        on=['product_id', 'region'], 
        how='left'
    )
    
    # Join category/brand targets
    df = df.merge(
        df_cat_brand_targets[['cat', 'brand', 'target_bm']].drop_duplicates(),
        on=['cat', 'brand'],
        how='left'
    )
    
    # Join category targets (fallback)
    df = df.merge(df_cat_targets, on='cat', how='left')
    
    # Set target_margin (use cat_brand target, fall back to cat target)
    df['target_margin'] = df['target_bm'].fillna(df['cat_target_margin'])
    
    # Join discounted sales
    df = df.merge(
        df_discounted,
        on=['warehouse_id', 'product_id'],
        how='left'
    )
    
    # Filter to positive prices and high_rr
    df = df[(df['price'] > 0) & (df['high_rr'] > 0)]
    
    return df

# Run after fetching all data:
df_metrics = build_product_metrics(
    df_stocks, df_sales, df_whs, df_prices, df_cogs,
    df_mp, df_bsp, df_scrapped, df_cat_brand_targets,
    df_cat_targets, df_discounted
)
print(f"Product metrics records: {len(df_metrics)}")


In [None]:
# =============================================================================
# PART 15: SCORING AND CLASSIFICATION
# =============================================================================

def add_scoring_classification(df, df_commercial):
    """
    Add scoring and classification columns.
    Replicates 'scored_classified' and 'final_scored' CTEs.
    """
    df = df.copy()
    
    # Map region for commercial constraints
    df['region_mapped'] = df['region'].apply(
        lambda x: 'Greater Cairo' if x in ['Cairo', 'Giza'] else x
    )
    
    # Join commercial constraints
    df = df.merge(
        df_commercial.rename(columns={'min_price': 'commercial_min'}),
        left_on=['product_id', 'region_mapped'],
        right_on=['product_id', 'region'],
        how='left',
        suffixes=('', '_comm')
    )
    
    # Calculate offers percentage
    df['offers_perc'] = (
        df['bundle_nmv'].fillna(0) + 
        df['sku_discount_nmv'].fillna(0) + 
        df['quantity_nmv'].fillna(0)
    ) / df['total_nmv'].replace(0, np.nan)
    
    # Calculate combined market prices
    df['combined_min_market'] = df[['mp_min_price', 'ben_soliman_price', 'min_scrapped']].min(axis=1)
    df['combined_max_market'] = df[['mp_max_price', 'ben_soliman_price', 'max_scrapped']].max(axis=1)
    
    # Calculate combined median (average of available medians)
    median_cols = ['mp_mod_price', 'ben_soliman_price', 'median_scrapped']
    df['combined_median_market'] = df[median_cols].mean(axis=1, skipna=True)
    
    # Calculate mp_price_score
    df['mp_price_score'] = (df['price'] - df['mp_min_price']) / (
        df['mp_max_price'] - df['mp_min_price']
    ).replace(0, np.nan)
    
    # Stock comment
    def get_stock_comment(row):
        if row['in_stock_perc'] == 0:
            return 'OOS'
        elif row['doh'] > 30:
            return 'Over Stocked'
        elif row['doh'] < 1:
            return 'low stock'
        else:
            return 'Good stocks'
    
    df['stock_comment'] = df.apply(get_stock_comment, axis=1)
    
    # RR comment
    def get_rr_comment(row):
        cu_rr = row['cu_rr']
        high_rr = row['high_rr']
        std = row['qty_std']
        
        if cu_rr >= high_rr - 0.5 * std and cu_rr <= high_rr + 0.5 * std:
            return 'Normal rr'
        elif cu_rr < high_rr - 0.5 * std:
            return 'low rr'
        elif cu_rr >= high_rr + 0.5 * std and cu_rr <= high_rr + 1.5 * std:
            return 'High rr'
        elif cu_rr > high_rr + 1.5 * std:
            return 'Very High rr'
        else:
            return ''
    
    df['rr_comment'] = df.apply(get_rr_comment, axis=1)
    
    # Rets comment
    def get_rets_comment(row):
        cu_rets = row['cu_rets']
        high_rets = row['high_rets']
        rets_std = row['rets_std']
        
        if cu_rets >= high_rets - 0.5 * rets_std and cu_rets <= high_rets + 0.5 * rets_std:
            return 'Normal rets'
        elif cu_rets < high_rets - 0.5 * rets_std:
            return 'low rets'
        elif cu_rets >= high_rets + 0.5 * rets_std and cu_rets <= high_rets + 1.5 * rets_std:
            return 'High rets'
        elif cu_rets > high_rets + 1.5 * rets_std:
            return 'Very High rets'
        else:
            return ''
    
    df['rets_comment'] = df.apply(get_rets_comment, axis=1)
    
    return df


In [None]:
# =============================================================================
# PART 16: FINAL SCORING - MARKET POSITION & PRICE COMMENTS
# =============================================================================

def add_final_scoring(df):
    """
    Add final scoring columns: combined_price_score, market_position_status, price_comment.
    """
    df = df.copy()
    
    # Combined price score
    def calc_combined_price_score(row):
        combined_max = row['combined_max_market']
        combined_min = row['combined_min_market']
        price = row['price']
        mp_score = row['mp_price_score']
        
        if pd.notna(combined_max) and combined_max > 0 and pd.notna(combined_min) and combined_min < 1e9:
            if combined_max != combined_min:
                return (price - combined_min) / (combined_max - combined_min)
        return mp_score
    
    df['combined_price_score'] = df.apply(calc_combined_price_score, axis=1)
    
    # Market position status
    def get_market_position(row):
        price = row['price']
        combined_min = row['combined_min_market']
        combined_median = row['combined_median_market']
        combined_max = row['combined_max_market']
        mp_min = row['mp_min_price']
        bsp = row['ben_soliman_price']
        median_scr = row['median_scrapped']
        
        # Check if no market data
        if (pd.isna(combined_median) and pd.isna(mp_min) and 
            pd.isna(bsp) and pd.isna(median_scr)):
            return 'No Market Data'
        
        # Adjust for edge cases
        min_val = combined_min if pd.notna(combined_min) and combined_min < 1e9 else None
        max_val = combined_max if pd.notna(combined_max) and combined_max > 0 else None
        
        if min_val is not None:
            if price < min_val * 0.95:
                return 'Below Market'
            elif price <= min_val * 1.05:
                return 'At Market Min'
        
        if pd.notna(combined_median):
            if price < combined_median * 0.95:
                return 'Below Median'
            elif price <= combined_median * 1.05:
                return 'At Median'
        
        if max_val is not None:
            if price < max_val * 0.95:
                return 'Above Median'
            elif price <= max_val * 1.05:
                return 'At Market Max'
            elif price > max_val * 1.05:
                return 'Above Market'
        
        return 'At Median'
    
    df['market_position_status'] = df.apply(get_market_position, axis=1)
    
    # Price comment
    def get_price_comment(row):
        combined_min = row['combined_min_market']
        combined_max = row['combined_max_market']
        price = row['price']
        bm = row['bm']
        target = row['target_margin']
        mp_score = row['mp_price_score']
        
        # Calculate price score
        if pd.notna(combined_max) and pd.notna(combined_min) and combined_max != combined_min:
            price_score = (price - combined_min) / (combined_max - combined_min)
        else:
            price_score = mp_score
        
        if pd.isna(price_score):
            if pd.notna(bm) and pd.notna(target):
                return 'below target' if bm < target else 'above target'
            return 'above target'
        
        if price_score > 0 and bm > target:
            return 'High price'
        elif price_score > 0 and bm < target:
            return 'Credit note'
        elif price_score < 0 and bm < target:
            return 'Low Price'
        elif price_score < 0 and bm > target:
            return 'room to reduce'
        elif bm < target:
            return 'below target'
        else:
            return 'above target'
    
    df['price_comment'] = df.apply(get_price_comment, axis=1)
    
    return df


In [None]:
# =============================================================================
# PART 17: ACTION CLASSIFICATION LOGIC
# =============================================================================

def determine_action(row):
    """
    Determine recommended action based on stock, price, and RR status.
    This replicates the complex CASE statement in the final SELECT.
    """
    stock_comment = row['stock_comment']
    price_comment = row['price_comment']
    rr_comment = row['rr_comment']
    offers_perc = row.get('offers_perc', 0) or 0
    commercial_min = row.get('commercial_min')
    bm = row['bm']
    target = row['target_margin']
    cu_rr = row['cu_rr']
    today_rr = row['today_rr']
    stocks = row['stocks']
    
    # OOS
    if stock_comment == 'OOS':
        return 'purchase'
    
    # Good stocks scenarios
    if stock_comment == 'Good stocks':
        if price_comment in ['Low Price', 'below target'] and rr_comment == 'low rr':
            if offers_perc < 0.1:
                return 'offers & Credit Note'
            return 'Credit Note'
        if price_comment in ['Low Price', 'below target'] and rr_comment != 'low rr':
            return 'increase price'
        if price_comment == 'Credit note' and rr_comment == 'low rr':
            return 'Credit Note'
        if price_comment in ['High price', 'room to reduce', 'above target'] and rr_comment == 'low rr':
            if pd.isna(commercial_min):
                return 'Reduce price'
            return 'Remove commercial min'
        if rr_comment == 'Normal rr':
            return 'No action'
        if rr_comment == 'Very High rr' and bm < target:
            return 'Increase price'
        if rr_comment in ['Very High rr', 'High rr'] and bm >= target:
            return 'No action'
        if rr_comment == 'High rr' and bm < target:
            return 'Increase price a bit'
        if price_comment in ['Low Price', 'below target'] and offers_perc > 0.1:
            return 'Credit Note'
    
    # Low stock scenarios
    if stock_comment == 'low stock':
        if price_comment == 'Credit note' and rr_comment == 'low rr':
            return 'Credit Note & Purchase'
        if price_comment in ['below target', 'Low Price'] and rr_comment == 'low rr':
            if offers_perc < 0.1:
                return 'Offers & Purchase'
            return 'Purchase'
        if price_comment in ['High price', 'above target', 'room to reduce'] and rr_comment == 'low rr':
            if pd.isna(commercial_min):
                return 'Purchase & reduce price'
            return 'Purchase & Remove commercial min'
        if rr_comment in ['High rr', 'Normal rr']:
            return 'Purchase'
        if rr_comment == 'Very High rr':
            return 'increase price'
    
    # Over stocked scenarios
    if stock_comment == 'Over Stocked':
        if price_comment in ['below target', 'Low Price', 'Credit note'] and rr_comment == 'low rr':
            return 'Credit Note'
        if price_comment in ['High price', 'above target', 'room to reduce'] and rr_comment == 'low rr':
            if cu_rr > 0:
                if pd.isna(commercial_min):
                    return 'Reduce price'
                return 'Remove commercial min'
            elif today_rr == 0:
                return 'Check activation'
            else:
                return 'Remove commercial min'
        if price_comment in ['below target', 'Low Price', 'Credit note'] and rr_comment in ['Very High rr', 'High rr', 'Normal rr']:
            if stocks / (cu_rr if cu_rr > 0 else 1) < 30:
                return 'No Action'
            return 'Credit Note'
        if price_comment in ['High price', 'above target'] and rr_comment in ['Very High rr', 'High rr', 'Normal rr']:
            if stocks / (cu_rr if cu_rr > 0 else 1) < 30:
                return 'No Action'
            return 'Reduce Price'
    
    # Additional edge cases
    if price_comment in ['below target', 'Low Price'] and rr_comment == 'low rr':
        if cu_rr == 0 and today_rr > 0:
            return 'No action'
        elif cu_rr == 0:
            return 'Check activation'
    
    return None

def add_actions(df):
    """Add action and team assignment columns."""
    df = df.copy()
    
    # Determine action
    df['action'] = df.apply(determine_action, axis=1)
    
    # Assign to teams based on action
    df['pricing_team'] = df['action'].apply(
        lambda x: 1 if pd.notna(x) and ('price' in str(x).lower() or 'offers' in str(x).lower()) else None
    )
    df['purchase_team'] = df['action'].apply(
        lambda x: 1 if pd.notna(x) and 'purchase' in str(x).lower() else None
    )
    df['commercial_team'] = df['action'].apply(
        lambda x: 1 if pd.notna(x) and ('credit note' in str(x).lower() or 
                                         'commercial min' in str(x).lower() or 
                                         'activation' in str(x).lower()) else None
    )
    
    return df


In [None]:
# =============================================================================
# PART 18: FINALIZE OUTPUT & ADD CALCULATED COLUMNS
# =============================================================================

def finalize_output(df, df_targets):
    """
    Finalize the output DataFrame with all calculated columns.
    Add stock value, stock contribution, and join targets data.
    """
    df = df.copy()
    
    # Calculate stock value
    df['stock_value'] = df['stocks'] * df['price']
    
    # Calculate stock contribution per warehouse
    df['stock_cntrb'] = df.groupby('warehouse_id')['stock_value'].transform(
        lambda x: x / x.sum() if x.sum() > 0 else 0
    )
    
    # Join targets data
    df = df.merge(
        df_targets[['warehouse_id', 'product_id', 'wh_sku_target', 'rem_nmv']],
        on=['warehouse_id', 'product_id'],
        how='left'
    )
    
    # Clean up combined_min_market (replace inf with None)
    df['combined_min_market'] = df['combined_min_market'].replace([np.inf, -np.inf, 1e9], np.nan)
    df['combined_max_market'] = df['combined_max_market'].replace([0, np.inf, -np.inf], np.nan)
    
    # Select and order final columns
    final_columns = [
        'region', 'wh', 'warehouse_id', 'product_id', 'sku', 'cat', 'brand',
        'stocks', 'doh', 'stock_comment', 'price', 'bm', 'target_margin', 'price_comment',
        'mp_min_price', 'mp_mod_price', 'mp_max_price', 'ben_soliman_price',
        'min_scrapped', 'median_scrapped', 'max_scrapped',
        'combined_min_market', 'combined_median_market', 'combined_max_market',
        'mp_price_score', 'combined_price_score', 'market_position_status',
        'high_rr', 'cu_rr', 'today_rr', 'rr_comment',
        'high_rets', 'cu_rets', 'rets_comment', 'offers_perc', 'commercial_min',
        'action', 'pricing_team', 'purchase_team', 'commercial_team',
        'stock_value', 'stock_cntrb', 'wh_sku_target', 'rem_nmv'
    ]
    
    # Keep only columns that exist
    existing_cols = [c for c in final_columns if c in df.columns]
    df = df[existing_cols]
    
    # Sort by high_rr * price descending
    df['_sort_key'] = df['high_rr'] * df['price']
    df = df.sort_values('_sort_key', ascending=False).drop('_sort_key', axis=1)
    
    # Rename 'wh' to 'warehouse_name' for clarity
    df = df.rename(columns={'wh': 'warehouse_name', 'cu_rr': 'current_rr'})
    
    return df


In [None]:
# =============================================================================
# PART 19: MAIN EXECUTION - RUN THE COMPLETE ANALYSIS
# =============================================================================

def run_pricing_status_analysis():
    """
    Main function to run the complete pricing status analysis.
    
    Returns:
        DataFrame with all pricing status metrics and recommended actions.
    """
    print("=" * 60)
    print("PRICING STATUS ANALYSIS")
    print("=" * 60)
    
    # Step 1: Get warehouse mapping
    print("\n[1/12] Loading warehouse mapping...")
    df_whs = get_warehouse_mapping()
    print(f"       ‚úì {len(df_whs)} warehouses loaded")
    
    # Step 2: Fetch COGS data
    print("\n[2/12] Fetching COGS data...")
    df_cogs = fetch_current_cogs()
    print(f"       ‚úì {len(df_cogs)} COGS records fetched")
    
    # Step 3: Fetch stocks data
    print("\n[3/12] Fetching stock data...")
    df_stocks = fetch_stocks()
    print(f"       ‚úì {len(df_stocks)} stock records fetched")
    
    # Step 4: Fetch sales data
    print("\n[4/12] Fetching sales data...")
    df_sales = fetch_sales()
    print(f"       ‚úì {len(df_sales)} sales records fetched")
    
    # Step 5: Fetch prices
    print("\n[5/12] Fetching price data...")
    df_prices = fetch_prices()
    print(f"       ‚úì {len(df_prices)} price records fetched")
    
    # Step 6: Fetch marketplace prices
    print("\n[6/12] Fetching marketplace prices...")
    df_mp = fetch_marketplace_prices()
    print(f"       ‚úì {len(df_mp)} marketplace price records fetched")
    
    # Step 7: Fetch Ben Soliman prices
    print("\n[7/12] Fetching Ben Soliman prices...")
    df_bsp = fetch_ben_soliman_prices()
    print(f"       ‚úì {len(df_bsp)} Ben Soliman price records fetched")
    
    # Step 8: Fetch scrapped prices
    print("\n[8/12] Fetching scrapped market prices...")
    df_scrapped = fetch_scrapped_prices()
    print(f"       ‚úì {len(df_scrapped)} scrapped price records fetched")
    
    # Step 9: Fetch targets
    print("\n[9/12] Fetching category/brand targets...")
    df_cat_brand_targets = fetch_cat_brand_targets()
    df_cat_targets = fetch_cat_targets(df_cat_brand_targets)
    print(f"       ‚úì {len(df_cat_brand_targets)} category/brand targets fetched")
    
    # Step 10: Fetch discounted sales
    print("\n[10/12] Fetching discounted sales data...")
    df_discounted = fetch_discounted_sales()
    print(f"       ‚úì {len(df_discounted)} discounted sales records fetched")
    
    # Step 11: Fetch commercial constraints
    print("\n[11/12] Fetching commercial constraints...")
    df_commercial = fetch_commercial_constraints()
    print(f"       ‚úì {len(df_commercial)} commercial constraint records fetched")
    
    # Step 12: Fetch targets data
    print("\n[12/12] Fetching warehouse SKU targets...")
    df_targets = fetch_targets_data(df_whs)
    print(f"       ‚úì {len(df_targets)} target records fetched")
    
    # Process and merge data
    print("\n" + "-" * 60)
    print("PROCESSING DATA...")
    print("-" * 60)
    
    # Build product metrics
    print("\n[A] Building product metrics...")
    df_metrics = build_product_metrics(
        df_stocks, df_sales, df_whs, df_prices, df_cogs,
        df_mp, df_bsp, df_scrapped, df_cat_brand_targets,
        df_cat_targets, df_discounted
    )
    print(f"    ‚úì {len(df_metrics)} product-warehouse combinations")
    
    # Add scoring and classification
    print("\n[B] Adding scoring and classification...")
    df_scored = add_scoring_classification(df_metrics, df_commercial)
    print(f"    ‚úì Scoring added")
    
    # Add final scoring
    print("\n[C] Adding final scoring (market position, price comments)...")
    df_final_scored = add_final_scoring(df_scored)
    print(f"    ‚úì Final scoring added")
    
    # Add actions
    print("\n[D] Determining recommended actions...")
    df_with_actions = add_actions(df_final_scored)
    print(f"    ‚úì Actions determined")
    
    # Finalize output
    print("\n[E] Finalizing output...")
    df_final = finalize_output(df_with_actions, df_targets)
    print(f"    ‚úì Final output ready with {len(df_final)} records")
    
    print("\n" + "=" * 60)
    print("ANALYSIS COMPLETE!")
    print("=" * 60)
    
    return df_final

# Run the full analysis:
df_result = run_pricing_status_analysis()
df_result.head()


In [None]:
# =============================================================================
# PART 20: UTILITY FUNCTIONS - EXPORT & SUMMARY
# =============================================================================

def export_results(df, filename='pricing_status_output.xlsx'):
    """Export results to Excel file."""
    df.to_excel(filename, index=False)
    print(f"Results exported to {filename}")
    return filename

def get_summary_stats(df):
    """Generate summary statistics from the analysis results."""
    summary = {
        'Total SKU-Warehouse combinations': len(df),
        'Unique Products': df['product_id'].nunique() if 'product_id' in df.columns else 0,
        'Unique Warehouses': df['warehouse_id'].nunique() if 'warehouse_id' in df.columns else 0,
    }
    
    # Stock status breakdown
    if 'stock_comment' in df.columns:
        stock_status = df['stock_comment'].value_counts().to_dict()
        summary['Stock Status'] = stock_status
    
    # Action breakdown
    if 'action' in df.columns:
        action_counts = df['action'].value_counts().to_dict()
        summary['Actions'] = action_counts
    
    # Team assignments
    if 'pricing_team' in df.columns:
        summary['Pricing Team Items'] = df['pricing_team'].notna().sum()
    if 'purchase_team' in df.columns:
        summary['Purchase Team Items'] = df['purchase_team'].notna().sum()
    if 'commercial_team' in df.columns:
        summary['Commercial Team Items'] = df['commercial_team'].notna().sum()
    
    # Market position breakdown
    if 'market_position_status' in df.columns:
        market_pos = df['market_position_status'].value_counts().to_dict()
        summary['Market Position'] = market_pos
    
    return summary

def print_summary(summary):
    """Pretty print the summary statistics."""
    print("\n" + "=" * 60)
    print("SUMMARY STATISTICS")
    print("=" * 60)
    
    for key, value in summary.items():
        if isinstance(value, dict):
            print(f"\n{key}:")
            for k, v in value.items():
                print(f"    {k}: {v}")
        else:
            print(f"{key}: {value}")
    
    print("\n" + "=" * 60)

# Usage:
summary = get_summary_stats(df_result)
print_summary(summary)
export_results(df_result, 'pricing_status_output.xlsx')


# Pricing Status Analysis - Quick Reference

## Data Flow Overview:

1. **Static Data**: Warehouse mappings (region, cohort_id)
2. **COGS**: Current cost of goods (wac_p)
3. **Running Rates**: Predicted running rates from past 14 days
4. **Stocks**: Available stock with DOH calculations
5. **Sales**: 150-day sales history with percentile metrics
6. **Prices**: Latest cohort pricing
7. **Market Prices**: Min/Mod/Max from marketplace, Ben Soliman, and scraped data
8. **Targets**: Category/brand margin targets
9. **Discounts**: Bundle, SKU discount, quantity discount percentages
10. **Commercial Constraints**: Minimum price restrictions

## Key Metrics:

| Metric | Description |
|--------|-------------|
| `doh` | Days on Hand (stocks / running_rate) |
| `bm` | Basic Margin ((price - cost) / price) |
| `high_rr` | 80th percentile of historical running rate |
| `combined_price_score` | Position within market price range (0-1) |

## Action Matrix:

| Stock Status | Price Status | RR Status | Recommended Action |
|--------------|--------------|-----------|-------------------|
| OOS | - | - | Purchase |
| Good stocks | Low/Below target | Low RR | Offers & Credit Note |
| Good stocks | High | Low RR | Reduce price / Remove commercial min |
| Low stock | - | Very High RR | Increase price |
| Over Stocked | High | Low RR, cu_rr=0 | Check activation |

## Configuration:

To customize the analysis, modify:
- `get_warehouse_mapping()` - Add/remove warehouses
- `fetch_prices()` - Modify cohort_ids
- `determine_action()` - Adjust action logic thresholds


In [None]:
# =============================================================================
# RUN ANALYSIS - UNCOMMENT AND EXECUTE
# =============================================================================

# Run the full analysis:
df_result = run_pricing_status_analysis()

# View summary:
summary = get_summary_stats(df_result)
print_summary(summary)

# Preview the data:
df_result.head(20)


In [None]:
# =============================================================================
# EXPORT RESULTS TO EXCEL (Optional)
# =============================================================================

# Export:
export_results(df_result, 'pricing_status_output.xlsx')


In [None]:
# =============================================================================
# AGGREGATE ANALYSIS VIEW
# =============================================================================

def create_aggregate_analysis(df):
    """
    Create aggregate analysis showing:
    - Total target NMV (high_rr * price)
    - Top dropping brands based on RR performance
    - Market status breakdown by brand
    - Required actions summary
    """
    df_analysis = df.copy()
    
    # Calculate target NMV per row (high_rr * price)
    df_analysis['target_nmv'] = df_analysis['high_rr'] * df_analysis['price']
    
    # Calculate RR drop percentage: (high_rr - current_rr) / high_rr
    df_analysis['rr_drop_pct'] = (df_analysis['high_rr'] - df_analysis['current_rr']) / df_analysis['high_rr'].replace(0, np.nan)
    
    # ==========================================================================
    # 1. TOTAL TARGET NMV SUMMARY
    # ==========================================================================
    total_target_nmv = df_analysis['target_nmv'].sum()
    total_current_nmv = (df_analysis['current_rr'] * df_analysis['price']).sum()
    nmv_gap = total_target_nmv - total_current_nmv
    nmv_gap_pct = nmv_gap / total_target_nmv * 100 if total_target_nmv > 0 else 0
    
    print("=" * 80)
    print("üìä AGGREGATE ANALYSIS - PRICING STATUS")
    print("=" * 80)
    
    print("\n" + "‚îÄ" * 80)
    print("üí∞ TOTAL NMV SUMMARY")
    print("‚îÄ" * 80)
    print(f"  Target NMV (High RR √ó Price):    {total_target_nmv:>15,.0f} EGP")
    print(f"  Current NMV (Current RR √ó Price): {total_current_nmv:>15,.0f} EGP")
    print(f"  NMV Gap:                          {nmv_gap:>15,.0f} EGP ({nmv_gap_pct:.1f}%)")
    
    # ==========================================================================
    # 2. TOP DROPPING BRANDS ANALYSIS
    # ==========================================================================
    brand_agg = df_analysis.groupby('brand').agg({
        'target_nmv': 'sum',
        'high_rr': 'sum',
        'current_rr': 'sum',
        'price': 'mean',
        'product_id': 'nunique',
        'warehouse_id': 'nunique'
    }).reset_index()
    
    brand_agg.columns = ['brand', 'target_nmv', 'total_high_rr', 'total_current_rr', 
                         'avg_price', 'num_products', 'num_warehouses']
    
    # Calculate current NMV and drop metrics
    brand_agg['current_nmv'] = brand_agg['total_current_rr'] * brand_agg['avg_price']
    brand_agg['nmv_drop'] = brand_agg['target_nmv'] - brand_agg['current_nmv']
    brand_agg['rr_drop_pct'] = ((brand_agg['total_high_rr'] - brand_agg['total_current_rr']) / 
                                 brand_agg['total_high_rr'].replace(0, np.nan) * 100)
    
    # Sort by NMV drop (biggest drops first)
    brand_agg_sorted = brand_agg.sort_values('nmv_drop', ascending=False)
    
    print("\n" + "‚îÄ" * 80)
    print("üìâ TOP 15 DROPPING BRANDS (by NMV Gap)")
    print("‚îÄ" * 80)
    
    top_dropping = brand_agg_sorted.head(15)
    print(f"{'Brand':<30} {'Target NMV':>15} {'Current NMV':>15} {'NMV Drop':>15} {'RR Drop%':>10}")
    print("‚îÄ" * 85)
    for _, row in top_dropping.iterrows():
        print(f"{str(row['brand'])[:30]:<30} {row['target_nmv']:>15,.0f} {row['current_nmv']:>15,.0f} "
              f"{row['nmv_drop']:>15,.0f} {row['rr_drop_pct']:>9.1f}%")
    
    # ==========================================================================
    # 3. MARKET STATUS BY DROPPING BRANDS
    # ==========================================================================
    # Get top 15 dropping brand names
    top_dropping_brands = top_dropping['brand'].tolist()
    
    # Filter data to only include top dropping brands
    df_top_brands = df_analysis[df_analysis['brand'].isin(top_dropping_brands)]
    
    # Market status breakdown for top dropping brands
    market_status_by_brand = df_top_brands.groupby(['brand', 'market_position_status']).agg({
        'target_nmv': 'sum',
        'product_id': 'nunique'
    }).reset_index()
    
    market_status_by_brand.columns = ['brand', 'market_position', 'target_nmv', 'num_skus']
    
    # Pivot for better view
    market_pivot = market_status_by_brand.pivot_table(
        index='brand', 
        columns='market_position', 
        values='num_skus', 
        fill_value=0
    ).reset_index()
    
    print("\n" + "‚îÄ" * 80)
    print("üè™ MARKET POSITION STATUS (Top Dropping Brands - SKU Count)")
    print("‚îÄ" * 80)
    print(market_pivot.to_string(index=False))
    
    # ==========================================================================
    # 4. REQUIRED ACTIONS BY BRAND
    # ==========================================================================
    actions_by_brand = df_top_brands.groupby(['brand', 'action']).agg({
        'target_nmv': 'sum',
        'product_id': 'nunique'
    }).reset_index()
    
    actions_by_brand.columns = ['brand', 'action', 'target_nmv', 'num_skus']
    
    # Pivot actions
    action_pivot = actions_by_brand.pivot_table(
        index='brand',
        columns='action',
        values='num_skus',
        fill_value=0
    ).reset_index()
    
    print("\n" + "‚îÄ" * 80)
    print("‚ö° REQUIRED ACTIONS (Top Dropping Brands - SKU Count)")
    print("‚îÄ" * 80)
    print(action_pivot.to_string(index=False))
    
    # ==========================================================================
    # 5. ACTION SUMMARY FOR TOP DROPPING BRANDS
    # ==========================================================================
    action_summary = df_top_brands.groupby('action').agg({
        'target_nmv': 'sum',
        'product_id': 'nunique',
        'brand': 'nunique'
    }).reset_index()
    
    action_summary.columns = ['action', 'target_nmv_at_risk', 'num_skus', 'num_brands']
    action_summary = action_summary.sort_values('target_nmv_at_risk', ascending=False)
    
    print("\n" + "‚îÄ" * 80)
    print("üìã ACTION PRIORITY SUMMARY (Top Dropping Brands)")
    print("‚îÄ" * 80)
    print(f"{'Action':<35} {'Target NMV at Risk':>18} {'# SKUs':>10} {'# Brands':>10}")
    print("‚îÄ" * 73)
    for _, row in action_summary.iterrows():
        action_name = str(row['action']) if pd.notna(row['action']) else 'No Action'
        print(f"{action_name[:35]:<35} {row['target_nmv_at_risk']:>18,.0f} {row['num_skus']:>10} {row['num_brands']:>10}")
    
    print("\n" + "=" * 80)
    
    # Return dataframes for further analysis
    return {
        'total_metrics': {
            'target_nmv': total_target_nmv,
            'current_nmv': total_current_nmv,
            'nmv_gap': nmv_gap,
            'nmv_gap_pct': nmv_gap_pct
        },
        'brand_analysis': brand_agg_sorted,
        'market_status_pivot': market_pivot,
        'action_pivot': action_pivot,
        'action_summary': action_summary
    }

# Run the aggregate analysis
aggregate_results = create_aggregate_analysis(df_result)


In [None]:
# =============================================================================
# DETAILED BRAND DRILLDOWN VIEW
# =============================================================================

def get_brand_drilldown(df, brand_name):
    """
    Get detailed drilldown for a specific brand showing:
    - All SKUs for the brand
    - Their market status, RR status, and recommended actions
    """
    df_brand = df[df['brand'] == brand_name].copy()
    
    if len(df_brand) == 0:
        print(f"No data found for brand: {brand_name}")
        return None
    
    # Calculate target NMV
    df_brand['target_nmv'] = df_brand['high_rr'] * df_brand['price']
    df_brand['current_nmv'] = df_brand['current_rr'] * df_brand['price']
    df_brand['nmv_gap'] = df_brand['target_nmv'] - df_brand['current_nmv']
    
    print(f"\n{'='*80}")
    print(f"üîç BRAND DRILLDOWN: {brand_name}")
    print(f"{'='*80}")
    
    # Summary stats
    print(f"\nüìä Summary:")
    print(f"   Total SKUs: {df_brand['product_id'].nunique()}")
    print(f"   Warehouses: {df_brand['warehouse_id'].nunique()}")
    print(f"   Target NMV: {df_brand['target_nmv'].sum():,.0f} EGP")
    print(f"   Current NMV: {df_brand['current_nmv'].sum():,.0f} EGP")
    print(f"   NMV Gap: {df_brand['nmv_gap'].sum():,.0f} EGP")
    
    # Show detailed SKU breakdown
    columns_to_show = ['warehouse_name', 'sku', 'price', 'high_rr', 'current_rr', 
                       'stock_comment', 'market_position_status', 'price_comment', 
                       'rr_comment', 'action', 'nmv_gap']
    
    existing_cols = [c for c in columns_to_show if c in df_brand.columns]
    
    df_display = df_brand[existing_cols].sort_values('nmv_gap', ascending=False)
    
    print(f"\nüìã SKU Details (sorted by NMV Gap):")
    print(df_display.to_string(index=False))
    
    return df_brand

# View top dropping brands DataFrame
print("üìà TOP DROPPING BRANDS (Full DataFrame):")
aggregate_results['brand_analysis'].head(15)


In [None]:
# =============================================================================
# EXAMPLE: DRILLDOWN INTO TOP DROPPING BRAND
# =============================================================================

# Get the top dropping brand name
top_brand = aggregate_results['brand_analysis'].iloc[0]['brand']

# Drilldown into the top dropping brand
brand_detail = get_brand_drilldown(df_result, top_brand)

# Or specify a brand manually:
# brand_detail = get_brand_drilldown(df_result, "Your Brand Name Here")


In [None]:
# =============================================================================
# EXPORT AGGREGATE ANALYSIS TO EXCEL
# =============================================================================

def get_pricing_action_skus(df):
    """
    Filter SKUs that need pricing action:
    - Good stocks or Over Stocked
    - High RR or Low RR
    - Price needs adjustment based on target margin or market prices
    """
    df_filtered = df.copy()
    
    # Calculate additional metrics for context
    df_filtered['target_nmv'] = df_filtered['high_rr'] * df_filtered['price']
    df_filtered['current_nmv'] = df_filtered['current_rr'] * df_filtered['price']
    df_filtered['nmv_gap'] = df_filtered['target_nmv'] - df_filtered['current_nmv']
    df_filtered['margin_gap'] = df_filtered['bm'] - df_filtered['target_margin']
    
    # Filter 1: Stock status is Good stocks or Over Stocked
    stock_filter = df_filtered['stock_comment'].isin(['Good stocks', 'Over Stocked'])
    
    # Filter 2: RR status is High rr, Very High rr, or low rr
    rr_filter = df_filtered['rr_comment'].isin(['High rr', 'Very High rr', 'low rr'])
    
    # Filter 3: Price needs adjustment - actions related to pricing or offers
    pricing_actions = [
        'Reduce price', 'Reduce Price',
        'Increase price', 'Increase Price',
        'Increase price a bit',
        'offers & Credit Note',
        'Remove commercial min',
        'Credit Note',
        'Credit note'
    ]
    action_filter = df_filtered['action'].isin(pricing_actions)
    
    # Alternative: Also include SKUs where margin is off target or price is off market
    margin_off_target = (df_filtered['bm'] < df_filtered['target_margin'] * 0.95) | \
                        (df_filtered['bm'] > df_filtered['target_margin'] * 1.1)
    
    price_off_market = df_filtered['market_position_status'].isin([
        'Below Market', 'Above Market', 'Below Median', 'Above Median'
    ])
    
    # Combine filters: (stock OK) AND (RR is high/low) AND (action needed OR margin/market off)
    combined_filter = stock_filter & rr_filter & (action_filter | margin_off_target | price_off_market)
    
    df_pricing_action = df_filtered[combined_filter].copy()
    
    # Add recommendation priority
    def get_priority(row):
        if row['rr_comment'] == 'low rr' and row['stock_comment'] == 'Over Stocked':
            return 1  # Highest priority - need to move stock
        elif row['rr_comment'] == 'low rr' and row['stock_comment'] == 'Good stocks':
            return 2
        elif row['rr_comment'] in ['High rr', 'Very High rr'] and row['bm'] < row['target_margin']:
            return 3  # Good opportunity to increase price
        else:
            return 4
    
    df_pricing_action['priority'] = df_pricing_action.apply(get_priority, axis=1)
    
    # Sort by priority and NMV gap
    df_pricing_action = df_pricing_action.sort_values(
        ['priority', 'nmv_gap'], 
        ascending=[True, False]
    )
    
    # Select relevant columns for the pricing action sheet
    columns_to_export = [
        'priority', 'region', 'warehouse_name', 'product_id', 'sku', 'cat', 'brand',
        'stocks', 'doh', 'stock_comment',
        'price', 'bm', 'target_margin', 'margin_gap', 'price_comment',
        'combined_min_market', 'combined_median_market', 'combined_max_market',
        'market_position_status',
        'high_rr', 'current_rr', 'rr_comment',
        'action', 'offers_perc', 'commercial_min',
        'target_nmv', 'current_nmv', 'nmv_gap'
    ]
    
    existing_cols = [c for c in columns_to_export if c in df_pricing_action.columns]
    
    return df_pricing_action[existing_cols]


def export_aggregate_analysis(df, aggregate_results, filename='pricing_aggregate_analysis.xlsx'):
    """
    Export the aggregate analysis to an Excel file with multiple sheets:
    - Summary: Overall metrics
    - Pricing Action: SKUs needing price adjustment
    - Brand Analysis: Top dropping brands
    - Market Status: Market position by brand
    - Actions: Required actions by brand
    - Raw Data: Full detail data
    """
    df_analysis = df.copy()
    df_analysis['target_nmv'] = df_analysis['high_rr'] * df_analysis['price']
    df_analysis['current_nmv'] = df_analysis['current_rr'] * df_analysis['price']
    df_analysis['nmv_gap'] = df_analysis['target_nmv'] - df_analysis['current_nmv']
    
    # Get pricing action SKUs
    df_pricing_action = get_pricing_action_skus(df)
    
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        # Sheet 1: Summary
        summary_df = pd.DataFrame([aggregate_results['total_metrics']])
        summary_df.to_excel(writer, sheet_name='Summary', index=False)
        
        # Sheet 2: Pricing Action - SKUs needing price adjustment
        df_pricing_action.to_excel(writer, sheet_name='Pricing Action', index=False)
        
        # Sheet 3: Brand Analysis (all brands)
        aggregate_results['brand_analysis'].to_excel(writer, sheet_name='Brand Analysis', index=False)
        
        # Sheet 4: Market Status Pivot
        aggregate_results['market_status_pivot'].to_excel(writer, sheet_name='Market Status', index=False)
        
        # Sheet 5: Action Pivot
        aggregate_results['action_pivot'].to_excel(writer, sheet_name='Actions by Brand', index=False)
        
        # Sheet 6: Action Summary
        aggregate_results['action_summary'].to_excel(writer, sheet_name='Action Summary', index=False)
        
        # Sheet 7: Raw Data with NMV calculations
        df_analysis.to_excel(writer, sheet_name='Raw Data', index=False)
    
    print(f"‚úì Aggregate analysis exported to: {filename}")
    print(f"  - Pricing Action sheet: {len(df_pricing_action)} SKUs needing price adjustment")
    return filename

# Export:
export_aggregate_analysis(df_result, aggregate_results, 'pricing_aggregate_analysis.xlsx')
