# Stock Health Dashboard

## Purpose
Comprehensive analysis of stock health across all SKUs and warehouses:
- **Bucket Classification**: Zero Demand, On Track, Above Target, Below Target, Overstocked
- **Actions Tracking**: Price changes, cart rule changes, discounts from all pricing modules
- **Trend Analysis**: Is performance improving compared to yesterday?

## Output
1. Interactive Jupyter analysis with visualizations
2. Excel file with two sheets:
   - SKU_Details: Full data per SKU/warehouse
   - Bucket_Summary: Aggregated metrics by bucket


In [None]:
# =============================================================================
# IMPORTS AND SETUP
# =============================================================================
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pytz
import os
import sys
import warnings
warnings.filterwarnings('ignore')

# Add parent directory to path
sys.path.append('modules')
sys.path.append('..')

import setup_environment_2
setup_environment_2.initialize_env()

# Cairo timezone for consistent timestamps
CAIRO_TZ = pytz.timezone('Africa/Cairo')
CAIRO_NOW = datetime.now(CAIRO_TZ)
TODAY = CAIRO_NOW.date()
YESTERDAY = TODAY - timedelta(days=1)

print(f"Stock Health Dashboard")
print(f"{'='*60}")
print(f"Current Time: {CAIRO_NOW.strftime('%Y-%m-%d %H:%M:%S')} Cairo")
print(f"Today: {TODAY}")
print(f"Yesterday: {YESTERDAY}")


In [None]:
# =============================================================================
# SNOWFLAKE CONNECTION
# =============================================================================
import snowflake.connector

def query_snowflake(query):
    """Execute a query on Snowflake and return results as DataFrame."""
    con = snowflake.connector.connect(
        user=os.environ["SNOWFLAKE_USERNAME"],
        account=os.environ["SNOWFLAKE_ACCOUNT"],
        password=os.environ["SNOWFLAKE_PASSWORD"],
        database=os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        data = cur.fetchall()
        columns = [desc[0].lower() for desc in cur.description]
        return pd.DataFrame(data, columns=columns)
    finally:
        con.close()

def get_snowflake_timezone():
    result = query_snowflake("SHOW PARAMETERS LIKE 'TIMEZONE'")
    return result.value[0] if len(result) > 0 else "UTC"

TIMEZONE = get_snowflake_timezone()
print(f"Snowflake Timezone: {TIMEZONE}")


In [None]:
# =============================================================================
# CONFIGURATION
# =============================================================================

# Warehouse Mapping
WAREHOUSE_MAPPING = [
    ('Cairo', 'Mostorod', 1, 700),
    ('Giza', 'Barageel', 236, 701),
    ('Giza', 'Sakkarah', 962, 701),
    ('Delta West', 'El-Mahala', 337, 703),
    ('Delta West', 'Tanta', 8, 703),
    ('Delta East', 'Mansoura FC', 339, 704),
    ('Delta East', 'Sharqya', 170, 704),
    ('Upper Egypt', 'Assiut FC', 501, 1124),
    ('Upper Egypt', 'Bani sweif', 401, 1126),
    ('Upper Egypt', 'Menya Samalot', 703, 1123),
    ('Upper Egypt', 'Sohag', 632, 1125),
    ('Alexandria', 'Khorshed Alex', 797, 702),
]

COHORT_IDS = [700, 701, 702, 703, 704, 1123, 1124, 1125, 1126]

# Bucket thresholds
STD_THRESHOLD = 3  # ¬±3 standard deviations
OVERSTOCKED_DOH_THRESHOLD = 30  # DOH > 30 = overstocked

# Output file
OUTPUT_FILE = f'stock_health_output_{TODAY.strftime("%Y%m%d")}.xlsx'

print(f"Output File: {OUTPUT_FILE}")
print(f"Overstocked DOH Threshold: > {OVERSTOCKED_DOH_THRESHOLD} days")


---
## Section 1: Base Data Loading
Load stocks, prices, WAC, P80/P70 benchmarks


In [None]:
# =============================================================================
# QUERY 1: CURRENT STOCKS
# =============================================================================
print("Loading current stocks...")

STOCK_QUERY = '''
SELECT 
    pw.warehouse_id,
    pw.product_id,
    pw.available_stock::INTEGER AS stocks
FROM product_warehouse pw
WHERE pw.warehouse_id NOT IN (6, 9, 10)
    AND pw.is_basic_unit = 1
'''

df_stocks = query_snowflake(STOCK_QUERY)
print(f"  Loaded {len(df_stocks)} stock records")


In [None]:
# =============================================================================
# QUERY 2: PRODUCT BASE DATA (SKU info, WAC)
# =============================================================================
print("Loading product base data...")

PRODUCT_BASE_QUERY = f'''
WITH skus_prices AS (
    WITH local_prices AS (
        SELECT  
            CASE 
                WHEN cpu.cohort_id IN (700, 695) THEN 'Cairo'
                WHEN cpu.cohort_id IN (701) THEN 'Giza'
                WHEN cpu.cohort_id IN (704, 698) THEN 'Delta East'
                WHEN cpu.cohort_id IN (703, 697) THEN 'Delta West'
                WHEN cpu.cohort_id IN (696, 1123, 1124, 1125, 1126) THEN 'Upper Egypt'
                WHEN cpu.cohort_id IN (702, 699) THEN 'Alexandria'
            END AS region,
            cohort_id,
            pu.product_id,
            pu.packing_unit_id,
            pu.basic_unit_count,
            AVG(cpu.price) AS price
        FROM cohort_product_packing_units cpu
        JOIN PACKING_UNIT_PRODUCTS pu ON pu.id = cpu.product_packing_unit_id
        WHERE cpu.cohort_id IN (700,701,702,703,704,695,696,697,698,699,1123,1124,1125,1126)
            AND cpu.created_at::date <> '2023-07-31'
            AND cpu.is_customized = TRUE
        GROUP BY ALL
    )
    
    SELECT region, cohort_id, product_id, price
    FROM local_prices
    WHERE basic_unit_count = 1
        AND ((product_id = 1309 AND packing_unit_id = 2) OR (product_id <> 1309))
)

SELECT DISTINCT
    sp.region, sp.cohort_id, p.id as product_id,
    CONCAT(p.name_ar, ' ', p.size, ' ', pu.name_ar) AS sku,
    b.name_ar AS brand,
    cat.name_ar AS cat,
    c.wac1, c.wac_p, sp.price as current_price
FROM skus_prices sp
JOIN products p ON p.id = sp.product_id
JOIN finance.all_cogs c ON c.product_id = sp.product_id 
    AND CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) BETWEEN c.from_date AND c.to_date
JOIN categories cat ON cat.id = p.category_id
JOIN brands b ON b.id = p.brand_id
JOIN product_units pu ON pu.id = p.unit_id
WHERE c.wac1 > 0 AND c.wac_p > 0
'''

df_product_base = query_snowflake(PRODUCT_BASE_QUERY)
print(f"  Loaded {len(df_product_base)} product base records")


In [None]:
# =============================================================================
# QUERY 3: P80/P70 BENCHMARKS (from Pricing_data_extraction table)
# =============================================================================
print("Loading P80/P70 benchmarks...")

BENCHMARK_QUERY = f'''
SELECT DISTINCT
    warehouse_id, product_id,
    p80_daily_240d,
    avg_daily_240d,
    std_daily_240d,
    p70_daily_retailers_240d,
    std_daily_retailers_240d,
    normal_refill,
    refill_stddev
FROM MATERIALIZED_VIEWS.Pricing_data_extraction
WHERE created_at = CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE
'''

df_benchmarks = query_snowflake(BENCHMARK_QUERY)
print(f"  Loaded {len(df_benchmarks)} benchmark records")

# If no data for today, try yesterday
if len(df_benchmarks) == 0:
    print("  No data for today, trying yesterday...")
    BENCHMARK_QUERY_YESTERDAY = f'''
    SELECT DISTINCT
        warehouse_id, product_id,
        p80_daily_240d,
        avg_daily_240d,
        std_daily_240d,
        p70_daily_retailers_240d,
        std_daily_retailers_240d,
        normal_refill,
        refill_stddev
    FROM MATERIALIZED_VIEWS.Pricing_data_extraction
    WHERE created_at = CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 1
    '''
    df_benchmarks = query_snowflake(BENCHMARK_QUERY_YESTERDAY)
    print(f"  Loaded {len(df_benchmarks)} benchmark records from yesterday")


---
## Section 2: Yesterday's Running Rate & Today's UTH


In [None]:
# =============================================================================
# QUERY 4: YESTERDAY'S SALES (Running Rate)
# =============================================================================
print("Loading yesterday's sales data...")

YESTERDAY_SALES_QUERY = f'''
SELECT
    pso.warehouse_id,
    pso.product_id,
    SUM(pso.purchased_item_count) AS yesterday_qty,
    SUM(pso.total_price) AS yesterday_nmv,
    COUNT(DISTINCT so.retailer_id) AS yesterday_retailers
FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
WHERE so.created_at::date = CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 1
    AND so.sales_order_status_id NOT IN (7, 12)
    AND so.channel IN ('telesales', 'retailer')
    AND pso.purchased_item_count <> 0
GROUP BY pso.warehouse_id, pso.product_id
'''

df_yesterday_sales = query_snowflake(YESTERDAY_SALES_QUERY)
print(f"  Loaded {len(df_yesterday_sales)} yesterday sales records")
print(f"  Total Yesterday Qty: {df_yesterday_sales['yesterday_qty'].sum():,.0f}")


In [None]:
# =============================================================================
# QUERY 5: TODAY'S UTH (Up-Till-Hour) SALES
# =============================================================================
print("Loading today's UTH sales...")

TODAY_UTH_QUERY = f'''
WITH params AS (
    SELECT
        CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE AS today,
        HOUR(CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())) AS current_hour
)
SELECT
    pso.warehouse_id,
    pso.product_id,
    SUM(pso.purchased_item_count) AS today_uth_qty,
    SUM(pso.total_price) AS today_uth_nmv,
    COUNT(DISTINCT so.retailer_id) AS today_uth_retailers
FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
CROSS JOIN params p
WHERE so.created_at::date = p.today
    AND HOUR(so.created_at) < p.current_hour
    AND so.sales_order_status_id NOT IN (7, 12)
    AND so.channel IN ('telesales', 'retailer')
    AND pso.purchased_item_count <> 0
GROUP BY pso.warehouse_id, pso.product_id
'''

df_today_uth = query_snowflake(TODAY_UTH_QUERY)
print(f"  Loaded {len(df_today_uth)} today UTH records")
print(f"  Total Today UTH Qty: {df_today_uth['today_uth_qty'].sum():,.0f}")


In [None]:
# =============================================================================
# QUERY 6: HISTORICAL HOURLY DISTRIBUTION (for closing expectation)
# =============================================================================
print("Loading hourly distribution...")

HOURLY_DIST_QUERY = f'''
WITH params AS (
    SELECT
        CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE AS today,
        CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 120 AS history_start,
        HOUR(CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())) AS current_hour
),
hourly_sales AS (
    SELECT
        pso.warehouse_id,
        c.name_ar AS cat,
        so.created_at::date AS sale_date,
        HOUR(so.created_at) AS sale_hour,
        SUM(pso.purchased_item_count) AS qty
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN products p ON p.id = pso.product_id
    JOIN categories c ON c.id = p.category_id
    CROSS JOIN params
    WHERE so.created_at::date BETWEEN params.history_start AND params.today - 1
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
    GROUP BY pso.warehouse_id, c.name_ar, so.created_at::date, sale_hour
),
daily_totals AS (
    SELECT warehouse_id, cat, sale_date,
        SUM(qty) AS day_total_qty
    FROM hourly_sales
    GROUP BY warehouse_id, cat, sale_date
),
uth_totals AS (
    SELECT hs.warehouse_id, hs.cat, hs.sale_date,
        SUM(hs.qty) AS uth_total_qty
    FROM hourly_sales hs
    CROSS JOIN params p
    WHERE hs.sale_hour < p.current_hour
    GROUP BY hs.warehouse_id, hs.cat, hs.sale_date
)
SELECT
    dt.warehouse_id, dt.cat,
    AVG(COALESCE(ut.uth_total_qty, 0) / NULLIF(dt.day_total_qty, 0)) AS avg_uth_pct
FROM daily_totals dt
LEFT JOIN uth_totals ut ON dt.warehouse_id = ut.warehouse_id 
    AND dt.cat = ut.cat AND dt.sale_date = ut.sale_date
WHERE dt.day_total_qty > 0
GROUP BY dt.warehouse_id, dt.cat
'''

df_hourly_dist = query_snowflake(HOURLY_DIST_QUERY)
print(f"  Loaded {len(df_hourly_dist)} hourly distribution records")
print(f"  Average UTH %: {df_hourly_dist['avg_uth_pct'].mean()*100:.1f}%")


---
## Section 3: Active Discounts (SKU Discounts + Quantity Discounts)


In [None]:
# =============================================================================
# QUERY 7: ACTIVE SKU DISCOUNTS
# =============================================================================
print("Loading active SKU discounts...")

SKU_DISCOUNT_QUERY = f'''
SELECT DISTINCT
    sdp.product_id,
    sd.discount_percentage AS sku_discount_pct,
    sd.start_at AS sku_disc_start,
    sd.end_at AS sku_disc_end
FROM sku_discounts sd
JOIN sku_discount_products sdp ON sdp.sku_discount_id = sd.id
WHERE sd.active = TRUE
    AND sd.deleted_at IS NULL
    AND CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) BETWEEN sd.start_at AND sd.end_at
'''

df_sku_discounts = query_snowflake(SKU_DISCOUNT_QUERY)
print(f"  Loaded {len(df_sku_discounts)} active SKU discount records")


In [None]:
# =============================================================================
# QUERY 8: ACTIVE QUANTITY DISCOUNTS (QD)
# =============================================================================
print("Loading active quantity discounts...")

QD_QUERY = f'''
WITH active_qd AS (
    SELECT DISTINCT 
        qd.id AS discount_id,
        qd.dynamic_tag_id AS tag_id,
        qdt.threshold_quantity,
        qdt.discount_percentage AS qd_discount_pct
    FROM quantity_discounts qd 
    JOIN quantity_discount_tiers qdt ON qdt.quantity_discount_id = qd.id
    WHERE qd.active = TRUE
        AND CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) 
            BETWEEN qd.start_at AND qd.end_at
),
tag_products AS (
    SELECT DISTINCT
        dtp.dynamic_tag_id AS tag_id,
        dtp.product_id
    FROM dynamic_tag_products dtp
    WHERE dtp.deleted_at IS NULL
)
SELECT 
    tp.product_id,
    aq.tag_id,
    MAX(aq.qd_discount_pct) AS qd_discount_pct  -- Take max discount available
FROM active_qd aq
JOIN tag_products tp ON tp.tag_id = aq.tag_id
GROUP BY tp.product_id, aq.tag_id
'''

df_qd_discounts = query_snowflake(QD_QUERY)
print(f"  Loaded {len(df_qd_discounts)} active QD records")


---
## Section 4: Today's Actions from Pricing Modules


In [None]:
# =============================================================================
# QUERY 9: TODAY'S ACTIONS FROM MODULE 3 (Periodic)
# =============================================================================
print("Loading Module 3 actions...")

MODULE3_ACTIONS_QUERY = f'''
SELECT 
    product_id, warehouse_id,
    price_action AS module3_price_action,
    CASE WHEN new_price IS NOT NULL AND new_price <> current_price THEN 1 ELSE 0 END AS module3_price_changed,
    CASE WHEN new_cart_rule IS NOT NULL AND new_cart_rule <> current_cart_rule THEN 1 ELSE 0 END AS module3_cart_changed,
    uth_status AS module3_uth_status,
    current_price AS module3_current_price,
    new_price AS module3_new_price,
    created_at AS module3_timestamp
FROM MATERIALIZED_VIEWS.pricing_periodic_push
WHERE created_at::DATE = CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE
QUALIFY ROW_NUMBER() OVER (PARTITION BY product_id, warehouse_id ORDER BY created_at DESC) = 1
'''

try:
    df_module3_actions = query_snowflake(MODULE3_ACTIONS_QUERY)
    print(f"  Loaded {len(df_module3_actions)} Module 3 action records")
except Exception as e:
    print(f"  Warning: Could not load Module 3 actions: {e}")
    df_module3_actions = pd.DataFrame(columns=['product_id', 'warehouse_id', 'module3_price_action'])


In [None]:
# =============================================================================
# QUERY 10: TODAY'S ACTIONS FROM MODULE 4 (Hourly)
# =============================================================================
print("Loading Module 4 actions...")

MODULE4_ACTIONS_QUERY = f'''
SELECT 
    product_id, warehouse_id,
    price_action AS module4_price_action,
    cart_rule_action AS module4_cart_action,
    CASE WHEN new_price IS NOT NULL AND new_price <> current_price THEN 1 ELSE 0 END AS module4_price_changed,
    CASE WHEN new_cart_rule IS NOT NULL AND new_cart_rule <> current_cart_rule THEN 1 ELSE 0 END AS module4_cart_changed,
    uth_qty_status AS module4_uth_qty_status,
    last_hour_qty_status AS module4_last_hour_status,
    current_price AS module4_current_price,
    new_price AS module4_new_price,
    created_at AS module4_timestamp
FROM MATERIALIZED_VIEWS.pricing_hourly_push
WHERE created_at::DATE = CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE
QUALIFY ROW_NUMBER() OVER (PARTITION BY product_id, warehouse_id ORDER BY created_at DESC) = 1
'''

try:
    df_module4_actions = query_snowflake(MODULE4_ACTIONS_QUERY)
    print(f"  Loaded {len(df_module4_actions)} Module 4 action records")
except Exception as e:
    print(f"  Warning: Could not load Module 4 actions: {e}")
    df_module4_actions = pd.DataFrame(columns=['product_id', 'warehouse_id', 'module4_price_action'])


---
## Section 5: Merge All Data


In [None]:
# =============================================================================
# MERGE ALL DATA INTO MASTER DATAFRAME
# =============================================================================
print("Merging all data...")

# Create warehouse mapping dataframe
df_warehouse = pd.DataFrame(
    WAREHOUSE_MAPPING,
    columns=['region', 'warehouse', 'warehouse_id', 'cohort_id']
)

# Start with product base and add warehouse mapping
df = df_product_base.merge(df_warehouse[['cohort_id', 'warehouse_id', 'warehouse']], on='cohort_id')
print(f"  After warehouse mapping: {len(df)} records")

# Merge stocks
df = df.merge(df_stocks, on=['warehouse_id', 'product_id'], how='left')
df['stocks'] = df['stocks'].fillna(0)
print(f"  After stocks merge: {len(df)} records")

# Merge benchmarks (P80/P70)
df = df.merge(df_benchmarks, on=['warehouse_id', 'product_id'], how='left')
df['p80_daily_240d'] = pd.to_numeric(df['p80_daily_240d'], errors='coerce').fillna(5)  # Default to 5 if missing
df['std_daily_240d'] = pd.to_numeric(df['std_daily_240d'], errors='coerce').fillna(0)
df['avg_daily_240d'] = pd.to_numeric(df.get('avg_daily_240d', 0), errors='coerce').fillna(0)
print(f"  After benchmarks merge: {len(df)} records")

# Merge yesterday's sales
df = df.merge(df_yesterday_sales, on=['warehouse_id', 'product_id'], how='left')
df['yesterday_qty'] = df['yesterday_qty'].fillna(0)
df['yesterday_nmv'] = df['yesterday_nmv'].fillna(0)
df['yesterday_retailers'] = df['yesterday_retailers'].fillna(0)
print(f"  After yesterday sales merge: {len(df)} records")

# Merge today's UTH
df = df.merge(df_today_uth, on=['warehouse_id', 'product_id'], how='left')
df['today_uth_qty'] = df['today_uth_qty'].fillna(0)
df['today_uth_nmv'] = df['today_uth_nmv'].fillna(0)
df['today_uth_retailers'] = df['today_uth_retailers'].fillna(0)
print(f"  After today UTH merge: {len(df)} records")

# Merge hourly distribution (for closing expectation)
df = df.merge(df_hourly_dist, on=['warehouse_id', 'cat'], how='left')
df['avg_uth_pct'] = pd.to_numeric(df['avg_uth_pct'], errors='coerce').fillna(0.5)  # Default to 50% if missing
print(f"  After hourly dist merge: {len(df)} records")

# Merge SKU discounts
df = df.merge(df_sku_discounts[['product_id', 'sku_discount_pct']], on='product_id', how='left')
df['sku_discount_pct'] = df['sku_discount_pct'].fillna(0)
print(f"  After SKU discounts merge: {len(df)} records")

# Merge QD discounts (aggregate by product_id to get max discount)
df_qd_agg = df_qd_discounts.groupby('product_id')['qd_discount_pct'].max().reset_index()
df = df.merge(df_qd_agg, on='product_id', how='left')
df['qd_discount_pct'] = df['qd_discount_pct'].fillna(0)
print(f"  After QD merge: {len(df)} records")

# Merge Module 3 actions
if len(df_module3_actions) > 0:
    df = df.merge(df_module3_actions, on=['warehouse_id', 'product_id'], how='left')
else:
    df['module3_price_action'] = None
    df['module3_price_changed'] = 0
    df['module3_cart_changed'] = 0
print(f"  After Module 3 actions merge: {len(df)} records")

# Merge Module 4 actions
if len(df_module4_actions) > 0:
    df = df.merge(df_module4_actions, on=['warehouse_id', 'product_id'], how='left')
else:
    df['module4_price_action'] = None
    df['module4_price_changed'] = 0
    df['module4_cart_changed'] = 0
print(f"  After Module 4 actions merge: {len(df)} records")

# Remove duplicates
df = df.drop_duplicates(subset=['warehouse_id', 'product_id'])
print(f"\n‚úÖ Final merged dataframe: {len(df)} records")


---
## Section 6: Calculate Derived Metrics & Effective Price


In [None]:
# =============================================================================
# CALCULATE DERIVED METRICS
# =============================================================================
print("Calculating derived metrics...")

# 1. Stock Value (WAC1)
df['stock_value_wac1'] = df['stocks'] * df['wac1']

# 2. Days on Hand (DOH)
df['running_rate'] = df['yesterday_qty'].fillna(0)
df.loc[df['running_rate'] == 0, 'running_rate'] = df.loc[df['running_rate'] == 0, 'avg_daily_240d'].fillna(1)
df['doh'] = df['stocks'] / df['running_rate'].replace(0, np.inf)
df['doh'] = df['doh'].replace([np.inf, -np.inf], 999)

# 3. Effective Price (after all discounts)
df['total_discount_pct'] = df['sku_discount_pct'] + df['qd_discount_pct']
df['total_discount_pct'] = df['total_discount_pct'].clip(upper=50)  # Cap at 50%
df['effective_price'] = df['current_price'] * (1 - df['total_discount_pct'] / 100)

# 4. Effective Margin
df['effective_margin'] = (df['effective_price'] - df['wac_p']) / df['effective_price']
df['effective_margin'] = df['effective_margin'].clip(lower=-1, upper=1)

# 5. Closing Expectation
df['closing_expectation'] = df['today_uth_qty'] / df['avg_uth_pct'].replace(0, 0.5)

# 6. Price/Cart Rule Changed Today
df['module3_price_changed'] = df.get('module3_price_changed', 0).fillna(0)
df['module4_price_changed'] = df.get('module4_price_changed', 0).fillna(0)
df['module3_cart_changed'] = df.get('module3_cart_changed', 0).fillna(0)
df['module4_cart_changed'] = df.get('module4_cart_changed', 0).fillna(0)

df['price_changed_today'] = ((df['module3_price_changed'] > 0) | (df['module4_price_changed'] > 0)).astype(int)
df['cart_rule_changed_today'] = ((df['module3_cart_changed'] > 0) | (df['module4_cart_changed'] > 0)).astype(int)

# 7. Has Active Discount
df['has_sku_discount'] = (df['sku_discount_pct'] > 0).astype(int)
df['has_qd_discount'] = (df['qd_discount_pct'] > 0).astype(int)
df['has_any_discount'] = ((df['sku_discount_pct'] > 0) | (df['qd_discount_pct'] > 0)).astype(int)

print("‚úÖ Derived metrics calculated")
print(f"  - Stock Value (WAC1): {df['stock_value_wac1'].sum():,.0f} EGP")
print(f"  - Avg DOH: {df['doh'].median():.1f} days (median)")
print(f"  - SKUs with discounts: {df['has_any_discount'].sum()}")


In [None]:
# =============================================================================
# BUCKET CLASSIFICATION LOGIC
# =============================================================================
print("Classifying SKUs into buckets...")

def classify_bucket(row):
    """
    Classify SKU into performance bucket based on yesterday's qty vs P80 target.
    """
    stocks = row['stocks']
    yesterday_qty = row['yesterday_qty']
    target = row['p80_daily_240d']
    std = row['std_daily_240d']
    
    # OOS first
    if stocks <= 0:
        return 'OOS'
    
    # Zero Demand: has stock but no sales yesterday
    if yesterday_qty == 0:
        return 'Zero Demand'
    
    # Calculate thresholds using ¬±3 std
    upper_bound = target + STD_THRESHOLD * std
    lower_bound = max(target - STD_THRESHOLD * std, 1)
    
    # Classify based on yesterday's performance vs target
    if yesterday_qty > upper_bound:
        return 'Above Target'
    elif yesterday_qty < lower_bound:
        return 'Below Target'
    else:
        return 'On Track'

# Apply bucket classification
df['bucket'] = df.apply(classify_bucket, axis=1)

# Add overstocked flag (DOH > 30)
df['is_overstocked'] = (df['doh'] > OVERSTOCKED_DOH_THRESHOLD).astype(int)

# Summary
print("\n" + "="*60)
print("BUCKET DISTRIBUTION")
print("="*60)
print(df['bucket'].value_counts().to_string())
print(f"\nOverstocked SKUs (DOH > {OVERSTOCKED_DOH_THRESHOLD}): {df['is_overstocked'].sum()}")


In [None]:
# =============================================================================
# TREND ANALYSIS: Is performance improving?
# =============================================================================
print("Analyzing trends...")

def analyze_trend(row):
    """Analyze trend based on bucket type. Compare today's running rate vs yesterday's full day."""
    bucket = row['bucket']
    yesterday_qty = row['yesterday_qty']
    closing_exp = row['closing_expectation']
    today_uth_qty = row['today_uth_qty']
    
    # Zero Demand: Is it starting to sell?
    if bucket == 'Zero Demand':
        if today_uth_qty > 0:
            return 'Starting to Sell', True
        else:
            return 'Still Zero', False
    
    # OOS: No trend analysis possible
    if bucket == 'OOS':
        return 'OOS', False
    
    # For other buckets: compare closing expectation vs yesterday
    if yesterday_qty > 0:
        ratio = closing_exp / yesterday_qty if yesterday_qty > 0 else 0
        if ratio > 1.1:
            return 'Improving', True
        elif ratio < 0.9:
            return 'Declining', False
        else:
            return 'Stable', True
    else:
        return 'No Baseline', False

# Apply trend analysis
trend_results = df.apply(analyze_trend, axis=1, result_type='expand')
df['trend_status'] = trend_results[0]
df['is_improving'] = trend_results[1].astype(int)

# For Above Target: Check if effective price is increasing
df['price_direction'] = 'Stable'
above_target_mask = df['bucket'] == 'Above Target'
df.loc[above_target_mask & (df['total_discount_pct'] == 0), 'price_direction'] = 'No Discount'
df.loc[above_target_mask & (df['price_changed_today'] == 1), 'price_direction'] = 'Price Changed'

print("\n" + "="*60)
print("TREND DISTRIBUTION")
print("="*60)
print(df['trend_status'].value_counts().to_string())
print(f"\nSKUs Improving: {df['is_improving'].sum()} ({df['is_improving'].mean()*100:.1f}%)")


In [None]:
# =============================================================================
# SUMMARY DASHBOARD BY BUCKET
# =============================================================================
print("\n" + "="*80)
print("STOCK HEALTH DASHBOARD - SUMMARY")
print(f"Generated: {CAIRO_NOW.strftime('%Y-%m-%d %H:%M:%S')} Cairo")
print("="*80)

# Create summary by bucket
bucket_summary = df.groupby('bucket').agg({
    'product_id': 'count',
    'stock_value_wac1': 'sum',
    'stocks': 'sum',
    'doh': 'median',
    'yesterday_qty': 'sum',
    'today_uth_qty': 'sum',
    'closing_expectation': 'sum',
    'price_changed_today': 'sum',
    'cart_rule_changed_today': 'sum',
    'has_any_discount': 'sum',
    'is_improving': 'sum',
    'effective_margin': 'mean',
    'is_overstocked': 'sum'
}).rename(columns={
    'product_id': 'sku_count',
    'stock_value_wac1': 'total_stock_value',
    'stocks': 'total_stock_units',
    'doh': 'median_doh',
    'yesterday_qty': 'total_yesterday_qty',
    'today_uth_qty': 'total_today_uth',
    'closing_expectation': 'total_closing_exp',
    'price_changed_today': 'skus_price_action',
    'cart_rule_changed_today': 'skus_cart_action',
    'has_any_discount': 'skus_with_discount',
    'is_improving': 'skus_improving',
    'effective_margin': 'avg_effective_margin',
    'is_overstocked': 'skus_overstocked'
})

# Calculate percentages
bucket_summary['pct_improving'] = (bucket_summary['skus_improving'] / bucket_summary['sku_count'] * 100).round(1)
bucket_summary['pct_with_discount'] = (bucket_summary['skus_with_discount'] / bucket_summary['sku_count'] * 100).round(1)
bucket_summary['avg_effective_margin'] = (bucket_summary['avg_effective_margin'] * 100).round(2)

print("\nBUCKET SUMMARY:")
print("-" * 80)
display(bucket_summary.round(2))


In [None]:
# =============================================================================
# PREPARE OUTPUT DATAFRAMES FOR EXCEL
# =============================================================================
print("Preparing Excel export...")

# Sheet 1: SKU Details - Select and order columns
detail_columns = [
    'warehouse_id', 'warehouse', 'region', 'cohort_id', 'product_id', 'sku', 'brand', 'cat',
    'stocks', 'stock_value_wac1', 'doh', 'is_overstocked',
    'wac1', 'wac_p',
    'current_price', 'effective_price', 'effective_margin',
    'sku_discount_pct', 'qd_discount_pct', 'total_discount_pct',
    'p80_daily_240d', 'std_daily_240d', 'avg_daily_240d',
    'yesterday_qty', 'yesterday_nmv', 'yesterday_retailers',
    'today_uth_qty', 'today_uth_nmv', 'today_uth_retailers', 'closing_expectation',
    'bucket', 'trend_status', 'is_improving', 'price_direction',
    'price_changed_today', 'cart_rule_changed_today',
    'has_sku_discount', 'has_qd_discount', 'has_any_discount',
    'module3_price_action', 'module3_uth_status',
    'module4_price_action', 'module4_uth_qty_status'
]

detail_columns = [c for c in detail_columns if c in df.columns]
df_details = df[detail_columns].copy()

# Sort by bucket and stock value
bucket_order = {'Zero Demand': 0, 'Below Target': 1, 'On Track': 2, 'Above Target': 3, 'OOS': 4}
df_details['bucket_order'] = df_details['bucket'].map(bucket_order)
df_details = df_details.sort_values(['bucket_order', 'stock_value_wac1'], ascending=[True, False])
df_details = df_details.drop('bucket_order', axis=1)

print(f"  SKU Details: {len(df_details)} rows, {len(df_details.columns)} columns")


In [None]:
# =============================================================================
# PREPARE BUCKET SUMMARY FOR EXCEL (Sheet 2)
# =============================================================================
bucket_summary_export = bucket_summary.reset_index()
bucket_summary_export.columns = [
    'Bucket', 'SKU Count', 'Stock Value (EGP)', 'Total Stock Units', 'Median DOH',
    'Yesterday Qty', 'Today UTH Qty', 'Closing Expectation',
    'Price Actions', 'Cart Actions', 'With Discount',
    'Improving', 'Avg Margin %', 'Overstocked', '% Improving', '% With Discount'
]

print(f"  Bucket Summary: {len(bucket_summary_export)} rows")
display(bucket_summary_export)


In [None]:
# =============================================================================
# EXPORT TO EXCEL
# =============================================================================
print(f"\nExporting to Excel: {OUTPUT_FILE}")

with pd.ExcelWriter(OUTPUT_FILE, engine='openpyxl') as writer:
    # Sheet 1: SKU Details
    df_details.to_excel(writer, sheet_name='SKU_Details', index=False)
    print(f"  ‚úÖ Sheet 'SKU_Details' written: {len(df_details)} rows")
    
    # Sheet 2: Bucket Summary
    bucket_summary_export.to_excel(writer, sheet_name='Bucket_Summary', index=False)
    print(f"  ‚úÖ Sheet 'Bucket_Summary' written: {len(bucket_summary_export)} rows")

print(f"\n" + "="*60)
print(f"‚úÖ EXPORT COMPLETE: {OUTPUT_FILE}")
print("="*60)


In [None]:
# =============================================================================
# FINAL SUMMARY
# =============================================================================
print("\n" + "="*80)
print("STOCK HEALTH DASHBOARD - COMPLETE")
print("="*80)
print(f"\nüìÖ Date: {TODAY}")
print(f"‚è∞ Generated: {datetime.now(CAIRO_TZ).strftime('%Y-%m-%d %H:%M:%S')} Cairo")
print(f"\nüìä OVERALL STATS:")
print(f"  Total SKUs Analyzed: {len(df):,}")
print(f"  Total Stock Value: {df['stock_value_wac1'].sum():,.0f} EGP")
print(f"  Total Stock Units: {df['stocks'].sum():,.0f}")

print(f"\nüì¶ BUCKET DISTRIBUTION:")
for bucket, count in df['bucket'].value_counts().items():
    pct = count / len(df) * 100
    print(f"  {bucket}: {count:,} ({pct:.1f}%)")

print(f"\n‚ö†Ô∏è ALERTS:")
print(f"  Overstocked SKUs (DOH > {OVERSTOCKED_DOH_THRESHOLD}): {df['is_overstocked'].sum()}")
print(f"  Zero Demand with Stock: {len(df[df['bucket'] == 'Zero Demand'])}")
print(f"  Below Target: {len(df[df['bucket'] == 'Below Target'])}")

print(f"\n‚úÖ POSITIVE SIGNALS:")
print(f"  SKUs Improving: {df['is_improving'].sum()} ({df['is_improving'].mean()*100:.1f}%)")
print(f"  Above Target: {len(df[df['bucket'] == 'Above Target'])}")
print(f"  On Track: {len(df[df['bucket'] == 'On Track'])}")

print(f"\nüìÅ OUTPUT: {OUTPUT_FILE}")
