# Quantity Discount (QD) Pricing System

This notebook calculates tiered pricing and quantities for products across warehouses.

## Workflow:
1. **Setup** - Imports, connections, and configuration
2. **Product Selection** - Select top products per warehouse based on performance
3. **Quantity Tiers** - Calculate tier 1 and tier 2 quantities based on order history
4. **Market Prices** - Gather competitive pricing data
5. **Price Tiers** - Calculate discounted prices for each tier
6. **Wholesale Pricing** - Calculate wholesale prices for bulk orders
7. **Export** - Save results to Excel


## 1. Setup & Imports


In [1]:
%%capture

# =============================================================================
# Package Installation
# =============================================================================

# Core
!pip install --upgrade pip

# Database Connectivity
!pip install psycopg2-binary
!pip install snowflake-connector-python==3.15.0
!pip install snowflake-sqlalchemy
!pip install sqlalchemy==1.4.46

# AWS & API
!pip install boto3
!pip install requests
!pip install keyring==23.11.0

# Google Sheets
!pip install oauth2client
!pip install gspread==5.9.0
!pip install gspread_dataframe
!pip install google.cloud

# Data Manipulation
!pip install pandas==2.2.1
!pip install numpy
!pip install polars
!pip install openpyxl
!pip install xlsxwriter

# Utilities
!pip install tqdm
!pip install warnings
!pip install --upgrade datetime
!pip install python-time
!pip install --upgrade pytz
!pip install db-dtypes
!pip install import-ipynb

# Analytics
!pip install statsmodels
!pip install scikit-learn
!pip install pulp

In [2]:

from google.oauth2.service_account import Credentials

In [3]:
# =============================================================================
# STANDARD LIBRARY IMPORTS
# =============================================================================
import os
import json
import time
import base64
import calendar
import warnings
from pathlib import Path
from datetime import datetime, date, timedelta

# =============================================================================
# THIRD-PARTY IMPORTS
# =============================================================================
import numpy as np
import pandas as pd
import pytz
import requests
import gspread
import boto3
import snowflake.connector
from tqdm import tqdm
from requests import get
from botocore.exceptions import ClientError
from google.oauth2.service_account import Credentials

# =============================================================================
# LOCAL IMPORTS & ENVIRONMENT SETUP
# =============================================================================
import setup_environment_2
import importlib
import import_ipynb

warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

  warn_incompatible_dep(


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


### Configuration Constants


In [4]:
# =============================================================================
# CONFIGURATION - Modify these parameters as needed
# =============================================================================

# -----------------------------------------------------------------------------
# Cohort IDs for QD program
# -----------------------------------------------------------------------------
COHORT_IDS = [700, 701, 702, 703, 704, 1123, 1124, 1125, 1126]

# -----------------------------------------------------------------------------
# Warehouse mappings: (region, warehouse_name, warehouse_id, cohort_id)
# -----------------------------------------------------------------------------
WAREHOUSE_MAPPING = [
    ('Cairo',       'Mostorod',      1,   700),
    ('Giza',        'Barageel',      236, 701),
    ('Giza',        'Sakkarah',      962, 701),
    ('Delta West',  'El-Mahala',     337, 703),
    ('Delta West',  'Tanta',         8,   703),
    ('Delta East',  'Mansoura FC',   339, 704),
    ('Delta East',  'Sharqya',       170, 704),
    ('Upper Egypt', 'Assiut FC',     501, 1124),
    ('Upper Egypt', 'Bani sweif',    401, 1126),
    ('Upper Egypt', 'Menya Samalot', 703, 1123),
    ('Upper Egypt', 'Sohag',         632, 1125),
    ('Alexandria',  'Khorshed Alex', 797, 702),
]

# Excluded warehouse IDs
EXCLUDED_WAREHOUSES = [6, 9, 10]

# Products to exclude from selection
PRODUCTS_TO_REMOVE = [7630,589]
CATS_TO_REMOVE = []#['مرقة وخلطات','صلصة و صوص','أرز','بقوليات','مكرونة','مكرونة سايب','زيوت','بقوليات و حبوب سايب','سمنة','ارز سايب','سمنة سايب','بهارات سايب','ياميش']
BRANDS_TO_REMOVE = ['بيتي عصاير']
# -----------------------------------------------------------------------------
# Pricing Parameters
# -----------------------------------------------------------------------------
MAX_DISCOUNT_PCT = 5.0    # Maximum discount allowed from current price (%)
MIN_DISCOUNT_PCT = 0.35   # Minimum discount required from current price (%)
MIN_RATIO        = 1.05    # Minimum discount-to-quantity ratio
MAX_RATIO        = 3      # Maximum discount-to-quantity ratio

# -----------------------------------------------------------------------------
# Product Selection Thresholds
# -----------------------------------------------------------------------------
MIN_ORDERS    = 20    # Minimum orders in 4 months
MIN_RETAILERS = 5     # Minimum unique retailers
MIN_NMV       = 5000  # Minimum revenue (EGP)
MIN_VELOCITY  = 0.5   # Minimum units per day

# -----------------------------------------------------------------------------
# Ranking Parameters
# -----------------------------------------------------------------------------
TOP_PRODUCTS_PER_WAREHOUSE   = 200  # Initial selection
FINAL_PRODUCTS_PER_WAREHOUSE = 133  # Final output

# -----------------------------------------------------------------------------
# Delivery Fees
# -----------------------------------------------------------------------------
DELIVERY_FEE_CAIRO_GIZA = 25
DELIVERY_FEE_OTHER      = 20

print("✓ Configuration loaded successfully!")


✓ Configuration loaded successfully!


### Functions

In [5]:
def get_secret(secret_name):
    """
    Retrieve secret from AWS Secrets Manager.
    
    Args:
        secret_name: Name/ID of the secret to retrieve
        
    Returns:
        Secret string or decoded binary
    """
    region_name = "us-east-1"
    
    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    
    try:
        get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    except ClientError as e:
        # Re-raise all AWS Secrets Manager exceptions
        raise e
    
    # Return decrypted secret (string or binary)
    if 'SecretString' in get_secret_value_response:
        return get_secret_value_response['SecretString']
    else:
        return base64.b64decode(get_secret_value_response['SecretBinary'])

In [6]:
# Load API credentials from AWS Secrets Manager
pricing_api_secret = json.loads(get_secret("prod/pricing/api/"))
username = pricing_api_secret["egypt_username"]
password = pricing_api_secret["egypt_password"]
secret   = pricing_api_secret["egypt_secret"]

print("✓ API credentials loaded")

✓ API credentials loaded


In [7]:
def get_access_token(url, client_id, client_secret):
    """
    Get OAuth access token for MaxAB APIs.
    
    Args:
        url: Token endpoint URL
        client_id: OAuth client ID
        client_secret: OAuth client secret
        
    Returns:
        Access token string
    """
    response = requests.post(
        url,
        data={
            "grant_type": "password",
            "username": username,
            "password": password
        },
        auth=(client_id, client_secret),
    )
    return response.json()["access_token"]

In [8]:
def post_QD(file_name):
    """
    Upload Quantity Discount file to MaxAB API.
    
    Args:
        file_name: Path to the Excel file to upload
        
    Returns:
        API response object
    """
    token = get_access_token(
        'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
        'main-system-externals',
        secret
    )
    
    url = "https://api.maxab.info/commerce/api/admins/v1/quantity-discounts"
    
    files = [
        ('file', (file_name, open(file_name, 'rb'), 
                  'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))
    ]
    headers = {'Authorization': f'bearer {token}'}
    
    response = requests.request("POST", url, headers=headers, data={}, files=files)
    return response

In [9]:
def post_cart_rules(cohort_id, file_name):
    """
    Upload Cart Rules file for a specific cohort.
    
    Args:
        cohort_id: ID of the cohort to update
        file_name: Path to the Excel file to upload
        
    Returns:
        API response object
    """
    token = get_access_token(
        'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
        'main-system-externals',
        secret
    )
    
    url = f"https://api.maxab.info/main-system/api/admin-portal/cohorts/{cohort_id}/cart-rules"
    
    files = [
        ('sheet', (file_name, open(file_name, 'rb'),
                   'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))
    ]
    headers = {'Authorization': f'bearer {token}'}
    
    response = requests.request("POST", url, headers=headers, data={}, files=files)
    return response

### Database Connection Function


In [10]:
def snowflake_query(country, query, warehouse=None, columns=[], conn=None):
    """
    Execute a query against Snowflake and return results as DataFrame.
    
    Args:
        country: Country identifier (e.g., "Egypt")
        query: SQL query string to execute
        warehouse: Snowflake warehouse (optional)
        columns: Custom column names (optional)
        conn: Existing connection (optional)
        
    Returns:
        pandas DataFrame with query results
    """
    con = snowflake.connector.connect(
        user     = os.environ["SNOWFLAKE_USERNAME"],
        account  = os.environ["SNOWFLAKE_ACCOUNT"],
        password = os.environ["SNOWFLAKE_PASSWORD"],
        database = os.environ["SNOWFLAKE_DATABASE"]
    )

    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        
        column_names = [col[0] for col in cur.description]
        results = cur.fetchall()
        
        if not results:
            out = pd.DataFrame(columns=[name.lower() for name in column_names])
        else:
            if len(columns) == 0:
                out = pd.DataFrame(np.array(results), columns=column_names)
                out.columns = out.columns.str.lower()
            else:
                out = pd.DataFrame(np.array(results), columns=columns)
                out.columns = out.columns.str.lower()
        
        return out
        
    except Exception as e:
        print(f"❌ Query error: {e}")
        raise
        
    finally:
        cur.close()
        con.close()

In [11]:
# Get Snowflake timezone for consistent date/time handling
query = "SHOW PARAMETERS LIKE 'TIMEZONE'"
timezone_result = snowflake_query("Egypt", query)
zone_to_use = timezone_result['value'].values[0]
print(f"✓ Using timezone: {zone_to_use}")

✓ Using timezone: America/Los_Angeles


### Feedback Loop - Previous QD Cycle Performance


In [12]:
# =============================================================================
# FEEDBACK LOOP: Get recommendations from previous QD cycle
# =============================================================================

feedback_query = '''
-- ============================================================
-- QD FEEDBACK LOOP QUERY
-- Based on PREVIOUS QD cycle performance
-- ============================================================

WITH warehouse_mapping AS (
    SELECT * 
    FROM (VALUES
        ('Assiut FC', 501, 3301),
        ('Bani sweif', 401, 3302),
        ('Barageel', 236, 3303),
        ('El-Mahala', 337, 3304),
        ('Khorshed Alex', 797, 3305),
        ('Mansoura FC', 339, 3306),
        ('Menya Samalot', 703, 3307),
        ('Mostorod', 1, 3308),
        ('Sakkarah', 962, 3309),
        ('Sharqya', 170, 3310),
        ('Sohag', 632, 3311),
        ('Tanta', 8, 3312)
    ) AS x(warehouse_name, warehouse_id, tag_id)
), 

-- Get the PREVIOUS (completed) QD cycle
previous_qd_cycle AS (
    SELECT 
        qd.id AS qd_id,
        qd.start_at AS start_at,
        qd.end_at AS end_at,
        qd.dynamic_tag_id,
        RANK() OVER (PARTITION BY qd.dynamic_tag_id ORDER BY qd.start_at desc ,qd.end_at DESC) AS cycle_rank
    FROM quantity_discounts qd
    WHERE qd.start_at::date < current_date
	and qd.start_at::date >= current_date - interval '10 days'
	and dynamic_tag_id >3300
    QUALIFY cycle_rank = 1
),

qd_products AS (
    SELECT 
        warehouse_id,
        product_id,
        sku,
        packing_unit_id,
        qd_id,
        MAX(CASE WHEN tier = 1 THEN quantity END) AS tier_1_qty,
        MAX(CASE WHEN tier = 1 THEN discount_percentage END) AS tier_1_discount_pct,
        MAX(CASE WHEN tier = 2 THEN quantity END) AS tier_2_qty,
        MAX(CASE WHEN tier = 2 THEN discount_percentage END) AS tier_2_discount_pct,
        MAX(CASE WHEN tier = 3 THEN quantity END) AS tier_3_qty,
        MAX(CASE WHEN tier = 3 THEN discount_percentage END) AS tier_3_discount_pct,
        start_at,
        end_at
    FROM (
        SELECT 
            wm.warehouse_id,
            qd.id AS qd_id,
            qdv.product_id,
            CONCAT(p.name_ar, ' ', p.size, ' ', product_units.name_ar) AS sku,
            qdv.packing_unit_id,
            qdv.quantity,
            qdv.discount_percentage,
            qd.start_at,
            qd.end_at,
            ROW_NUMBER() OVER (
                PARTITION BY qdv.product_id, qdv.packing_unit_id, qd.id 
                ORDER BY qdv.quantity
            ) AS tier
        FROM quantity_discounts qd 
        JOIN quantity_discount_values qdv ON qd.id = qdv.quantity_discount_id 
        JOIN warehouse_mapping wm ON wm.tag_id = qd.dynamic_tag_id
        JOIN products p ON p.id = qdv.product_id 
        JOIN product_units ON product_units.id = p.unit_id 
        JOIN previous_qd_cycle pqd ON qd.id = pqd.qd_id
    )
    GROUP BY ALL
),

excluded_rets AS (
    SELECT dta.TAGGABLE_ID AS retailer_id
    FROM DYNAMIC_TAGS dt 
    JOIN dynamic_taggables dta ON dt.id = dta.dynamic_tag_id 
    WHERE dt.name LIKE '%whole_sale%'
        AND dt.id > 3000
),

orders_with_qd AS (
    SELECT 
        w.id AS warehouse_id,
        w.name AS warehouse,
        pso.product_id,
        sku,
        pso.packing_unit_id,
        pso.purchased_item_count AS qty,
        pso.total_price AS nmv,
        pso.basic_unit_count,
        pso.item_quantity_discount_value AS qd_discount_per_item,
        COALESCE(f.wac_p, 0) AS wac,
        
        -- Tier thresholds from previous cycle
        qd.tier_1_qty,
        qd.tier_2_qty,
        qd.tier_3_qty,
        qd.tier_1_discount_pct,
        qd.tier_2_discount_pct,
        qd.tier_3_discount_pct,
        
        -- Determine which tier was used
        CASE 
            WHEN pso.item_quantity_discount_value = 0 THEN 'Base'
            WHEN pso.purchased_item_count >= COALESCE(qd.tier_3_qty, 999999) AND qd.tier_3_qty IS NOT NULL THEN 'Tier 3'
            WHEN pso.purchased_item_count >= COALESCE(qd.tier_2_qty, 999999) AND qd.tier_2_qty IS NOT NULL THEN 'Tier 2'
            WHEN pso.purchased_item_count >= COALESCE(qd.tier_1_qty, 999999) THEN 'Tier 1'
            ELSE 'Base'
        END AS tier_used,
        
        -- Total discount
        pso.item_quantity_discount_value * pso.purchased_item_count AS total_qd_discount,
        
        -- Near-miss flags for T1
        CASE WHEN qd.tier_1_qty IS NOT NULL AND qty >= qd.tier_1_qty * 0.8 AND qty < qd.tier_1_qty THEN 1 ELSE 0 END AS near_miss_t1,
        CASE WHEN qd.tier_1_qty IS NOT NULL AND qty >= qd.tier_1_qty * 0.9 AND qty < qd.tier_1_qty THEN 1 ELSE 0 END AS very_close_t1,
        
        -- Near-miss flags for T2 (among T1 achievers)
        CASE WHEN qd.tier_2_qty IS NOT NULL AND qty >= qd.tier_1_qty AND qty >= qd.tier_2_qty * 0.8 AND qty < qd.tier_2_qty THEN 1 ELSE 0 END AS near_miss_t2,
        CASE WHEN qd.tier_2_qty IS NOT NULL AND qty >= qd.tier_1_qty AND qty >= qd.tier_2_qty * 0.9 AND qty < qd.tier_2_qty THEN 1 ELSE 0 END AS very_close_t2,
        
        -- Near-miss flags for T3 (among T2 achievers)
        CASE WHEN qd.tier_3_qty IS NOT NULL AND qty >= qd.tier_2_qty AND qty >= qd.tier_3_qty * 0.8 AND qty < qd.tier_3_qty THEN 1 ELSE 0 END AS near_miss_t3,
        CASE WHEN qd.tier_3_qty IS NOT NULL AND qty >= qd.tier_2_qty AND qty >= qd.tier_3_qty * 0.9 AND qty < qd.tier_3_qty THEN 1 ELSE 0 END AS very_close_t3
        
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id 
    JOIN warehouses w ON w.id = pso.warehouse_id
    JOIN qd_products qd ON qd.product_id = pso.product_id 
        AND qd.packing_unit_id = pso.packing_unit_id 
        AND qd.warehouse_id = pso.warehouse_id
    JOIN finance.all_cogs f ON f.product_id = pso.product_id 
        AND so.created_at BETWEEN f.from_date AND f.to_date
    WHERE so.created_at BETWEEN (SELECT MIN(start_at) FROM previous_qd_cycle) AND (SELECT MAX(end_at) FROM previous_qd_cycle)
        AND so.sales_order_status_id NOT IN (7, 12)
        AND pso.purchased_item_count > 0 
        AND so.retailer_id NOT IN (SELECT retailer_id FROM excluded_rets)
),

performance_metrics AS (
    SELECT 
        warehouse_id,
        warehouse,
        product_id,
        sku,
        packing_unit_id,
        
        -- Current tier configuration
        MAX(tier_1_qty) AS prev_tier_1_qty,
        MAX(tier_1_discount_pct) AS prev_tier_1_discount,
        MAX(tier_2_qty) AS prev_tier_2_qty,
        MAX(tier_2_discount_pct) AS prev_tier_2_discount,
        MAX(tier_3_qty) AS prev_tier_3_qty,
        MAX(tier_3_discount_pct) AS prev_tier_3_discount,
        
        -- Order counts (for reference)
        COUNT(*) AS total_orders,
        COUNT(CASE WHEN tier_used = 'Base' THEN 1 END) AS base_orders,
        COUNT(CASE WHEN tier_used = 'Tier 1' THEN 1 END) AS t1_orders,
        COUNT(CASE WHEN tier_used = 'Tier 2' THEN 1 END) AS t2_orders,
        COUNT(CASE WHEN tier_used = 'Tier 3' THEN 1 END) AS t3_orders,
        
        -- NMV by tier
        SUM(nmv) AS total_nmv,
        SUM(CASE WHEN tier_used = 'Base' THEN nmv ELSE 0 END) AS base_nmv,
        SUM(CASE WHEN tier_used = 'Tier 1' THEN nmv ELSE 0 END) AS t1_nmv,
        SUM(CASE WHEN tier_used = 'Tier 2' THEN nmv ELSE 0 END) AS t2_nmv,
        SUM(CASE WHEN tier_used = 'Tier 3' THEN nmv ELSE 0 END) AS t3_nmv,
        
        -- CONVERSION RATES (NMV-based)
        ROUND(SUM(CASE WHEN tier_used = 'Base' THEN nmv ELSE 0 END) * 100.0 / NULLIF(SUM(nmv), 0), 1) AS base_pct,
        ROUND(SUM(CASE WHEN tier_used = 'Tier 1' THEN nmv ELSE 0 END) * 100.0 / NULLIF(SUM(nmv), 0), 1) AS t1_conversion_pct,
        ROUND(SUM(CASE WHEN tier_used = 'Tier 2' THEN nmv ELSE 0 END) * 100.0 / NULLIF(SUM(nmv), 0), 1) AS t2_conversion_pct,
        ROUND(SUM(CASE WHEN tier_used = 'Tier 3' THEN nmv ELSE 0 END) * 100.0 / NULLIF(SUM(nmv), 0), 1) AS t3_conversion_pct,
        
        -- Near-miss NMV
        SUM(CASE WHEN near_miss_t1 = 1 THEN nmv ELSE 0 END) AS near_miss_t1_nmv,
        SUM(CASE WHEN near_miss_t2 = 1 THEN nmv ELSE 0 END) AS near_miss_t2_nmv,
        SUM(CASE WHEN near_miss_t3 = 1 THEN nmv ELSE 0 END) AS near_miss_t3_nmv,
        
        -- Near-miss as % of potential NMV
        ROUND(SUM(CASE WHEN near_miss_t1 = 1 THEN nmv ELSE 0 END) * 100.0 
            / NULLIF(SUM(CASE WHEN tier_used = 'Base' THEN nmv ELSE 0 END), 0), 1) AS near_miss_t1_pct,
        ROUND(SUM(CASE WHEN near_miss_t2 = 1 THEN nmv ELSE 0 END) * 100.0 
            / NULLIF(SUM(CASE WHEN tier_used = 'Tier 1' THEN nmv ELSE 0 END), 0), 1) AS near_miss_t2_pct,
        ROUND(SUM(CASE WHEN near_miss_t3 = 1 THEN nmv ELSE 0 END) * 100.0 
            / NULLIF(SUM(CASE WHEN tier_used = 'Tier 2' THEN nmv ELSE 0 END), 0), 1) AS near_miss_t3_pct,
        
        -- Quantity analysis
        ROUND(AVG(qty), 1) AS avg_qty,
        MEDIAN(qty) AS median_qty,
        
        -- Financial
        SUM(qty * wac * basic_unit_count) AS total_cogs,
        SUM(COALESCE(total_qd_discount, 0)) AS total_discount,
        
        -- Blended margin
        ROUND((SUM(nmv) - SUM(qty * wac * basic_unit_count) - SUM(COALESCE(total_qd_discount, 0))) * 100.0 
            / NULLIF(SUM(nmv), 0), 2) AS blended_margin_pct,
        
        -- Margin by tier
        ROUND(SUM(CASE WHEN tier_used = 'Base' THEN nmv - (qty * wac * basic_unit_count) END) * 100.0
            / NULLIF(SUM(CASE WHEN tier_used = 'Base' THEN nmv END), 0), 2) AS base_margin_pct,
        ROUND(SUM(CASE WHEN tier_used = 'Tier 1' THEN nmv - (qty * wac * basic_unit_count) - total_qd_discount END) * 100.0
            / NULLIF(SUM(CASE WHEN tier_used = 'Tier 1' THEN nmv END), 0), 2) AS t1_margin_pct,
        ROUND(SUM(CASE WHEN tier_used = 'Tier 2' THEN nmv - (qty * wac * basic_unit_count) - total_qd_discount END) * 100.0
            / NULLIF(SUM(CASE WHEN tier_used = 'Tier 2' THEN nmv END), 0), 2) AS t2_margin_pct,
        ROUND(SUM(CASE WHEN tier_used = 'Tier 3' THEN nmv - (qty * wac * basic_unit_count) - total_qd_discount END) * 100.0
            / NULLIF(SUM(CASE WHEN tier_used = 'Tier 3' THEN nmv END), 0), 2) AS t3_margin_pct
            
    FROM orders_with_qd
    GROUP BY ALL
    HAVING total_orders >= 5
),

with_targets AS (
    SELECT 
        pm.*,
        b.name_ar AS brand,
        c.name_ar AS cat,
        COALESCE(AVG(ct.margin), 0.05) AS target_margin
    FROM performance_metrics pm
    JOIN products p ON p.id = pm.product_id
    JOIN brands b ON b.id = p.brand_id
    JOIN categories c ON c.id = p.category_id
    LEFT JOIN performance.commercial_targets ct 
        ON ct.cat = c.name_ar 
        AND ct.brand = b.name_ar 
        AND ct.date >= DATE_TRUNC('month', CURRENT_DATE)
    GROUP BY ALL
),

feedback_recommendations AS (
    SELECT 
        *,
        
        -- Margin health indicator
        CASE 
            WHEN blended_margin_pct < target_margin * 100 * 0.90 THEN 'CRITICAL'
            WHEN blended_margin_pct < target_margin * 100 * 0.95 THEN 'LOW'
            WHEN blended_margin_pct > target_margin * 100 * 1.10 THEN 'HIGH'
            ELSE 'HEALTHY'
        END AS margin_status,
        
        -- TIER 1 RECOMMENDATIONS
        CASE 
            WHEN t1_conversion_pct > 20 AND blended_margin_pct < target_margin * 100 * 0.95 
                THEN 'INCREASE_QTY'
            WHEN t1_conversion_pct < 8 AND blended_margin_pct > target_margin * 100 AND near_miss_t1_pct > 15 
                THEN 'DECREASE_QTY'
            WHEN t1_conversion_pct < 5 AND blended_margin_pct > target_margin * 100 * 1.05 AND near_miss_t1_pct <= 15 
                THEN 'INCREASE_DISCOUNT'
            WHEN t1_conversion_pct > 25 AND blended_margin_pct >= target_margin * 100 * 0.95 
                THEN 'SLIGHT_INCREASE_QTY'
            ELSE 'NO_CHANGE'
        END AS t1_action,
        
        CASE 
            WHEN t1_conversion_pct > 20 AND blended_margin_pct < target_margin * 100 * 0.95 
                THEN CEIL(prev_tier_1_qty * 1.20)
            WHEN t1_conversion_pct < 8 AND blended_margin_pct > target_margin * 100 AND near_miss_t1_pct > 15 
                THEN CEIL(prev_tier_1_qty * 0.9)
            WHEN t1_conversion_pct > 25 AND blended_margin_pct >= target_margin * 100 * 0.95 
                THEN CEIL(prev_tier_1_qty * 1.10)
            ELSE prev_tier_1_qty
        END AS suggested_t1_qty,
        
        CASE 
            WHEN blended_margin_pct < target_margin * 100 * 0.90 
                THEN GREATEST(prev_tier_1_discount - 0.25, 0.2)
            WHEN t1_conversion_pct < 5 AND blended_margin_pct > target_margin * 100 * 1.05 
                THEN LEAST(prev_tier_1_discount + 0.25, 4.0)
            ELSE prev_tier_1_discount
        END AS suggested_t1_discount,
        
        -- TIER 2 RECOMMENDATIONS
        CASE 
            WHEN t2_conversion_pct > 15 AND t2_margin_pct < target_margin * 100 * 0.80 
                THEN 'INCREASE_QTY'
            WHEN t1_conversion_pct > 10 AND t2_conversion_pct < 3 AND near_miss_t2_pct > 20 
                THEN 'DECREASE_QTY'
            WHEN t1_conversion_pct > 10 AND t2_conversion_pct < 3 AND near_miss_t2_pct <= 20 
                THEN 'INCREASE_DISCOUNT'
            WHEN prev_tier_2_qty > prev_tier_1_qty * 2 AND t2_conversion_pct < 5 
                THEN 'REDUCE_RATIO'
            ELSE 'NO_CHANGE'
        END AS t2_action,
        
        CASE 
            WHEN t2_conversion_pct > 15 AND t2_margin_pct < target_margin * 100 * 0.80 
                THEN CEIL(prev_tier_2_qty * 1.20)
            WHEN t1_conversion_pct > 10 AND t2_conversion_pct < 3 AND near_miss_t2_pct > 20 
                THEN CEIL(prev_tier_2_qty * 0.9)
            WHEN prev_tier_2_qty > prev_tier_1_qty * 2 AND t2_conversion_pct < 5 
                THEN CEIL(prev_tier_1_qty * 1.7)
            ELSE prev_tier_2_qty
        END AS suggested_t2_qty,
        
        CASE 
            WHEN t2_margin_pct < target_margin * 100 * 0.70 
                THEN GREATEST(prev_tier_2_discount - 0.25, prev_tier_1_discount + 0.3)
            WHEN t1_conversion_pct > 10 AND t2_conversion_pct < 3 AND near_miss_t2_pct <= 20 
                THEN LEAST(prev_tier_2_discount + 0.25, 5.0)
            ELSE prev_tier_2_discount
        END AS suggested_t2_discount,
        
        -- TIER 3 RECOMMENDATIONS
        CASE 
            WHEN t3_conversion_pct > 5 AND t3_margin_pct < target_margin * 100 * 0.50 
                THEN 'INCREASE_QTY_OR_REDUCE_DISCOUNT'
            WHEN t2_conversion_pct > 5 AND t3_conversion_pct < 1 AND near_miss_t3_pct > 25 
                THEN 'DECREASE_QTY'
            WHEN t3_conversion_pct = 0 AND prev_tier_3_qty IS NOT NULL 
                THEN 'DECREASE_QTY_OR_INCREASE_DISCOUNT'
            ELSE 'NO_CHANGE'
        END AS t3_action,
        
        CASE 
            WHEN t3_conversion_pct > 5 AND t3_margin_pct < target_margin * 100 * 0.50 
                THEN CEIL(prev_tier_3_qty * 1.05)
            WHEN t2_conversion_pct > 5 AND t3_conversion_pct < 1 AND near_miss_t3_pct > 25 
                THEN CEIL(prev_tier_3_qty * 0.95)
            WHEN t3_conversion_pct = 0 AND prev_tier_3_qty IS NOT NULL 
                THEN CEIL(prev_tier_3_qty * 0.9)
            ELSE prev_tier_3_qty
        END AS suggested_t3_qty,
        
        CASE 
            WHEN t3_margin_pct < target_margin * 100 * 0.30 
                THEN GREATEST(prev_tier_3_discount - 0.25, prev_tier_2_discount + 0.3)
            WHEN t3_conversion_pct = 0 AND prev_tier_3_qty IS NOT NULL 
                THEN LEAST(prev_tier_3_discount + 0.25, 6.0)
            ELSE prev_tier_3_discount
        END AS suggested_t3_discount
        
    FROM with_targets
)

SELECT 
    warehouse_id,
    product_id,
    packing_unit_id,
    sku,
    brand,
    cat,
    total_orders,
    total_nmv,
    ROUND(target_margin * 100, 2) AS target_margin_pct,
    blended_margin_pct,
    margin_status,
    base_pct,
    t1_conversion_pct,
    t2_conversion_pct,
    t3_conversion_pct,
    near_miss_t1_pct,
    near_miss_t2_pct,
    near_miss_t3_pct,
    prev_tier_1_qty,
    prev_tier_1_discount,
    t1_action,
    suggested_t1_qty,
    suggested_t1_discount,
    prev_tier_2_qty,
    prev_tier_2_discount,
    t2_action,
    suggested_t2_qty,
    suggested_t2_discount,
    prev_tier_3_qty,
    prev_tier_3_discount,
    t3_action,
    suggested_t3_qty,
    suggested_t3_discount,
    CASE 
        WHEN t1_action != 'NO_CHANGE' OR t2_action != 'NO_CHANGE' OR t3_action != 'NO_CHANGE' 
        THEN TRUE ELSE FALSE 
    END AS has_recommendation
FROM feedback_recommendations
ORDER BY 
    CASE margin_status 
        WHEN 'CRITICAL' THEN 1 
        WHEN 'LOW' THEN 2 
        WHEN 'HIGH' THEN 3 
        ELSE 4 
    END,
    total_nmv DESC
'''

print("Fetching feedback from previous QD cycle...")
try:
    feedback_data = snowflake_query("Egypt", feedback_query)
    
    for col in feedback_data.columns:
        feedback_data[col] = pd.to_numeric(feedback_data[col], errors='ignore')
    
    if len(feedback_data) > 0:
        print(f"✓ Got feedback for {len(feedback_data)} SKUs")
        print(f"  - With recommendations: {feedback_data['has_recommendation'].sum()}")
        print(f"  - T1 changes: {(feedback_data['t1_action'] != 'NO_CHANGE').sum()}")
        print(f"  - T2 changes: {(feedback_data['t2_action'] != 'NO_CHANGE').sum()}")
        print(f"  - T3 changes: {(feedback_data['t3_action'] != 'NO_CHANGE').sum()}")
        print(f"  - Margin status: {feedback_data['margin_status'].value_counts().to_dict()}")
    else:
        print("⚠ No feedback data available (no previous cycle ended today)")
        feedback_data = pd.DataFrame()
except Exception as e:
    print(f"⚠ Could not fetch feedback data: {e}")
    feedback_data = pd.DataFrame()


Fetching feedback from previous QD cycle...
✓ Got feedback for 1582 SKUs
  - With recommendations: 1409
  - T1 changes: 580
  - T2 changes: 1036
  - T3 changes: 519
  - Margin status: {'CRITICAL': 892, 'HIGH': 312, 'HEALTHY': 272, 'LOW': 106}


In [13]:
# =============================================================================
# FEEDBACK LOOP CONFIGURATION
# =============================================================================

# Minimum elasticity ratio between tiers
# Elasticity = (discount_ratio) / (qty_ratio) >= 1.1
# This ensures discount increases proportionally more than quantity requirement
MIN_ELASTICITY_RATIO = 1.1

print("✓ Feedback loop configuration loaded")
print(f"  - Minimum elasticity ratio: {MIN_ELASTICITY_RATIO}")
print(f"  - Constraints: T1 qty < T2 qty < T3 qty")
print(f"  - Constraints: T1 disc < T2 disc < T3 disc")


✓ Feedback loop configuration loaded
  - Minimum elasticity ratio: 1.1
  - Constraints: T1 qty < T2 qty < T3 qty
  - Constraints: T1 disc < T2 disc < T3 disc


### Google Sheets Connection (Removed - No Longer Needed)


## 2. Product Selection

Select top-performing products per warehouse based on:
- Gross profit ranking (40% weight)
- Sales velocity ranking (25% weight)
- Order count ranking (20% weight)
- Retailer count ranking (15% weight)


In [14]:
query = f''' 
WITH rr AS (
    SELECT product_id, warehouse_id, rr
    FROM (
        SELECT *, 
               MAX(date) OVER (PARTITION BY product_id, warehouse_id) as max_date
        FROM finance.PREDICTED_RUNNING_RATES
        QUALIFY date = max_date
            AND date::date >= CURRENT_DATE - 14 
    )
),

stocks AS (
    SELECT 
        warehouse_id,
        product_id,
        SUM(stocks) as stocks,
        CASE 
            WHEN SUM(rr) > 0 THEN SUM(stocks) / SUM(rr) 
            ELSE SUM(stocks) 
        END as doh
    FROM (
        SELECT DISTINCT 
            product_warehouse.warehouse_id,
            product_warehouse.product_id,
            (product_warehouse.available_stock)::integer as stocks,
            COALESCE(rr.rr, 0) as rr 
        FROM product_warehouse
        JOIN products ON product_warehouse.product_id = products.id
        JOIN product_units ON products.unit_id = product_units.id
        LEFT JOIN rr ON rr.product_id = products.id 
            AND rr.warehouse_id = product_warehouse.warehouse_id
        WHERE product_warehouse.warehouse_id NOT IN (6, 9, 10)
            AND product_warehouse.is_basic_unit = 1
            AND product_warehouse.available_stock > 0 
    )
    GROUP BY warehouse_id, product_id
    HAVING doh >= 1
),

base AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
    FROM (
        SELECT x.*, TAGGABLE_ID as retailer_id 
        FROM (
            SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
            FROM cohorts 
            WHERE is_active = 'true'
                AND id IN (700,701,702,703,704,1123,1124,1125,1126)
        ) x 
        JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
        WHERE dt.taggable_id not IN (
            SELECT taggable_id FROM DYNAMIC_TAGgables 
            WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
        )
    )
    QUALIFY rnk = 1 
    ORDER BY cohort_id
),

warehouse_retailer_counts AS (
    SELECT 
        whs.warehouse_id,
        COUNT(DISTINCT base.retailer_id) as total_warehouse_retailers
    FROM base
    CROSS JOIN (SELECT DISTINCT warehouse_id FROM (VALUES
            (38), (1), (236), (962), (337), (8), (339), (170), 
            (501), (401), (703), (632), (797)
        ) x(warehouse_id)
    ) whs
    GROUP BY whs.warehouse_id
),

cohort_warehouse_map AS (
    SELECT cohort_id, warehouse_id
    FROM (VALUES
        (700, 38), (700, 1), (701, 236), (701, 962),
        (703, 337), (703, 8), (704, 339), (704, 170),
        (1124, 501), (1126, 401), (1123, 703), (1125, 632), (702, 797)
    ) x(cohort_id, warehouse_id)
),

cohort_prices AS (
    SELECT  
        cpu.cohort_id, pu.product_id, pu.packing_unit_id,
        pu.basic_unit_count, AVG(cpu.price) as price
    FROM cohort_product_packing_units cpu
    JOIN PACKING_UNIT_PRODUCTS pu ON pu.id = cpu.product_packing_unit_id
    WHERE cpu.cohort_id IN (700,701,702,703,704,1123,1124,1125,1126)
        AND cpu.created_at::date <> '2023-07-31'
        AND cpu.is_customized = true
    GROUP BY cpu.cohort_id, pu.product_id, pu.packing_unit_id, pu.basic_unit_count
),

live_cohort_prices AS (
select cohort_id, product_id, pu_id as packing_unit_id,
        buc as basic_unit_count, NEW_PRICE as price
from materialized_views.DBDP_PRICES
where created_at = current_date
and DATE_PART('hour',CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) BETWEEN SPLIT_PART(time_slot, '-', 1)::int AND (SPLIT_PART(time_slot, '-', 1)::int)+1
and cohort_id in (700,701,702,703,704,696,695,698,697,699,1123,1124,1125,1126)
),

combined_cohort_prices AS (
    SELECT *
    FROM (
        SELECT *, 1 AS priority FROM live_cohort_prices
        UNION ALL
        SELECT *, 2 AS priority FROM cohort_prices
    )
    QUALIFY ROW_NUMBER() OVER (PARTITION BY cohort_id, product_id, packing_unit_id ORDER BY priority) = 1
),

warehouse_prices AS (
    SELECT 
        cwm.warehouse_id, ccp.product_id, ccp.packing_unit_id,
        ccp.basic_unit_count, ccp.price
    FROM combined_cohort_prices ccp
    JOIN cohort_warehouse_map cwm ON cwm.cohort_id = ccp.cohort_id
    WHERE ccp.price IS NOT NULL
),

product_performance AS (
    SELECT 
        w.name as warehouse,
        w.id as warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) as sku,
        brands.name_ar as brand,
        categories.name_ar as category,
        COUNT(DISTINCT so.parent_sales_order_id) as total_orders,
        COUNT(DISTINCT so.retailer_id) as total_retailers,
        SUM(pso.purchased_item_count) as total_packing_units_sold,
        SUM(pso.purchased_item_count * pso.basic_unit_count) as total_basic_units_sold,
        SUM(pso.total_price) as total_nmv,
        SUM(COALESCE(f.wac_p, 0) * pso.purchased_item_count * pso.basic_unit_count) as total_cogs,
        (SUM(pso.total_price) - SUM(COALESCE(f.wac_p, 0) * pso.purchased_item_count * pso.basic_unit_count)) / 
            NULLIF(SUM(pso.total_price), 0) as blended_margin,
        AVG(pso.purchased_item_count) as avg_packing_units_per_order,
        SUM(pso.purchased_item_count) / 120.0 as packing_units_per_day,
        -- New metrics for qty potential
        STDDEV(pso.purchased_item_count) as order_qty_stddev,
        MAX(pso.purchased_item_count) as max_order_qty,
        COUNT(DISTINCT so.parent_sales_order_id) / NULLIF(COUNT(DISTINCT so.retailer_id), 0) as orders_per_retailer
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN base ON base.retailer_id = so.retailer_id
    JOIN products ON products.id = pso.product_id
    JOIN brands ON products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id 
        AND categories.name_ar NOT LIKE '%سايب%'
    JOIN finance.all_cogs f ON f.product_id = pso.product_id
        AND f.from_date::date <= so.created_at::date
        AND f.to_date::date > so.created_at::date
    JOIN product_units ON product_units.id = products.unit_id
    JOIN warehouses w ON w.id = pso.warehouse_id
    WHERE so.created_at::date BETWEEN current_date - 60 AND CURRENT_DATE - 1
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
        AND products.activation = 'true'
        and products.id <> 11794
        AND w.id NOT IN (6, 9, 10)
    GROUP BY ALL
),

-- Category benchmarks for qty potential comparison
category_benchmarks AS (
    SELECT 
        category,
        AVG(avg_packing_units_per_order) as category_avg_qty
    FROM product_performance
    GROUP BY category
),

product_performance_with_penetration AS (
    SELECT 
        pp.*,
        wrc.total_warehouse_retailers,
        (pp.total_retailers * 100.0 / NULLIF(wrc.total_warehouse_retailers, 0)) as retailer_penetration_pct,
        cb.category_avg_qty
    FROM product_performance pp
    LEFT JOIN warehouse_retailer_counts wrc ON wrc.warehouse_id = pp.warehouse_id
    LEFT JOIN category_benchmarks cb ON cb.category = pp.category
),

product_performance_with_price AS (
    SELECT 
        pp.*,
        COALESCE(wp.price, 0) as product_price,
        COALESCE(wp.basic_unit_count, 1) as basic_unit_count
    FROM product_performance_with_penetration pp
    LEFT JOIN warehouse_prices wp ON wp.warehouse_id = pp.warehouse_id
        AND wp.product_id = pp.product_id 
        AND wp.packing_unit_id = pp.packing_unit_id
),

qualified_products AS (
    SELECT 
        pp.warehouse,
        pp.warehouse_id,
        pp.product_id,
        pp.packing_unit_id,
        pp.sku,
        pp.brand,
        pp.category,
        pp.total_orders,
        pp.total_retailers,
        pp.total_packing_units_sold,
        pp.total_basic_units_sold,
        pp.total_nmv,
        pp.blended_margin,
        pp.avg_packing_units_per_order,
        pp.packing_units_per_day,
        pp.retailer_penetration_pct,
        pp.product_price,
        pp.basic_unit_count,
        pp.order_qty_stddev,
        pp.max_order_qty,
        pp.orders_per_retailer,
        pp.category_avg_qty,
        s.doh,
        s.stocks,
        (pp.total_nmv * pp.blended_margin) as gross_profit,
        
        -- Qty Potential Score (max 3.0, min 1.0)
        (
            -- Factor 1: Room to grow vs category (30%)
            (CASE 
                WHEN pp.avg_packing_units_per_order < pp.category_avg_qty * 0.7 THEN 3
                WHEN pp.avg_packing_units_per_order < pp.category_avg_qty THEN 2
                ELSE 1
            END) * 0.30
            
            -- Factor 2: Repeat purchase pattern (30%)
            + (CASE 
                WHEN pp.orders_per_retailer >= 3 THEN 3
                WHEN pp.orders_per_retailer >= 1.5 THEN 2
                ELSE 1
            END) * 0.30
            
            -- Factor 3: Current avg qty headroom (25%)
            + (CASE 
                WHEN pp.avg_packing_units_per_order < 2 THEN 3
                WHEN pp.avg_packing_units_per_order < 4 THEN 2
                ELSE 1
            END) * 0.25
            
            -- Factor 4: Order qty variance - flexibility (15%)
            + (CASE 
                WHEN pp.order_qty_stddev > pp.avg_packing_units_per_order * 0.5 THEN 3
                WHEN pp.order_qty_stddev > pp.avg_packing_units_per_order * 0.25 THEN 2
                ELSE 1
            END) * 0.15
        ) as qty_potential_score,
        
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY (pp.total_nmv * pp.blended_margin) DESC) as gp_rank,
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY pp.packing_units_per_day DESC) as velocity_rank,
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY pp.total_orders DESC) as order_rank,
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY pp.total_retailers DESC) as retailer_rank
    FROM product_performance_with_price pp
    JOIN stocks s ON s.product_id = pp.product_id 
        AND s.warehouse_id = pp.warehouse_id
),

top_products AS (
    SELECT 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category,
        total_orders,
        total_retailers,
        total_packing_units_sold,
        total_basic_units_sold,
        ROUND(total_nmv, 2) as total_nmv,
        ROUND(blended_margin * 100, 2) as margin_pct,
        ROUND(avg_packing_units_per_order, 2) as avg_order_qty,
        ROUND(packing_units_per_day, 2) as units_per_day,
        ROUND(retailer_penetration_pct, 1) as retailer_penetration_pct,
        ROUND(gross_profit, 2) as gross_profit,
        ROUND(product_price, 2) as packing_unit_price,
        basic_unit_count,
        ROUND(product_price / NULLIF(basic_unit_count, 0), 2) as price_per_basic_unit,
        gp_rank,
        velocity_rank,
        order_rank,
        retailer_rank,
        ROUND(doh, 2) as days_on_hand,
        stocks as available_stock,
        (gp_rank * 0.25 + velocity_rank * 0.1 + order_rank * 0.3 + retailer_rank * 0.35) as combined_rank_score,
        
        -- Qty potential metrics
        ROUND(orders_per_retailer, 2) as orders_per_retailer,
        ROUND(order_qty_stddev, 2) as order_qty_stddev,
        max_order_qty,
        ROUND(category_avg_qty, 2) as category_avg_qty,
        ROUND(qty_potential_score, 2) as qty_potential_score,
        
        -- Brand rank
        ROW_NUMBER() OVER (
            PARTITION BY warehouse_id, brand 
            ORDER BY (gp_rank * 0.25 + velocity_rank * 0.1 + order_rank * 0.3 + retailer_rank * 0.35)
        ) as brand_rank
        
    FROM qualified_products
)

SELECT 
    warehouse,
    warehouse_id,
    product_id,
    packing_unit_id,
    sku,
    brand,
    category as cat,
    total_orders,
    total_retailers,
    total_packing_units_sold,
    total_basic_units_sold,
    total_nmv,
    margin_pct,
    avg_order_qty,
    units_per_day,
    retailer_penetration_pct,
    gross_profit,
    packing_unit_price,
    basic_unit_count,
    price_per_basic_unit,
    days_on_hand,
    available_stock,
    gp_rank as gross_profit_rank,
    velocity_rank,
    order_rank,
    retailer_rank,
    brand_rank,
    -- Qty potential columns
    orders_per_retailer,
    order_qty_stddev,
    max_order_qty,
    category_avg_qty,
    qty_potential_score,
    ROUND(combined_rank_score, 2) as combined_score,
    ROW_NUMBER() OVER (PARTITION BY warehouse ORDER BY combined_rank_score) as final_rank
FROM top_products
WHERE combined_rank_score <= 500
  AND brand_rank <= 5  -- Maximum 5 products per brand per warehouse
QUALIFY final_rank <= 500
ORDER BY warehouse, combined_rank_score
'''

# Execute query and convert numeric columns
print("Fetching product selection data...")
selected_products = snowflake_query("Egypt", query)

for col in selected_products.columns:
    selected_products[col] = pd.to_numeric(selected_products[col], errors='ignore')

print(f"✓ Retrieved {len(selected_products)} products from {selected_products['warehouse_id'].nunique()} warehouses")

Fetching product selection data...
✓ Retrieved 4205 products from 15 warehouses


In [15]:
# Remove excluded products
selected_products = selected_products[~selected_products['product_id'].isin(PRODUCTS_TO_REMOVE)]
selected_products = selected_products[~selected_products['cat'].isin(CATS_TO_REMOVE)]
selected_products = selected_products[~selected_products['brand'].isin(BRANDS_TO_REMOVE)]
print(f"✓ Selected {len(selected_products)} products after exclusions")

✓ Selected 4138 products after exclusions


## 3. Quantity Tier Calculation

Calculate tier 1 and tier 2 quantities based on:
- Order history from frequent buyers (2+ orders)
- Statistical analysis (median, Q3, P85, P90, P95)
- IQR outlier removal


In [16]:
selected_df = selected_products[['warehouse_id', 'product_id', 'packing_unit_id']].values.tolist()
tuples_string = ','.join([f"({int(wh_id)}, {int(prod_id)}, {int(pu_id)})" for wh_id, prod_id, pu_id in selected_df])
query = f'''
WITH selected_products AS (
    SELECT warehouse_id, product_id, packing_unit_id
    FROM (VALUES
      {tuples_string}
    ) AS x(warehouse_id, product_id, packing_unit_id)
),

-- Same base filtering as product selection query
-- Retailers in QD cohorts AND in specific dynamic tags
base AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
    FROM (
        SELECT x.*, TAGGABLE_ID as retailer_id 
        FROM (
            SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
            FROM cohorts 
            WHERE is_active = 'true'
                AND id IN (700,701,702,703,704,1123,1124,1125,1126)
        ) x 
        JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
        WHERE dt.taggable_id not IN (
            SELECT taggable_id FROM DYNAMIC_TAGgables 
            WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
        )
    )
    QUALIFY rnk = 1 
),

raw_order_quantities AS (
    SELECT 
        whs.wh as warehouse,
        whs.warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) as sku,
        brands.name_ar as brand,
        categories.name_ar as category,
        so.parent_sales_order_id,
        so.retailer_id,
        so.created_at::date as order_date,
        SUM(pso.purchased_item_count) as order_qty,
        SUM(pso.total_price) as order_value,
        -- ADD RECENCY WEIGHT: Recent orders get higher weight (exponential decay)
        EXP(-0.02 * DATEDIFF('day', so.created_at::date, CURRENT_DATE)) as recency_weight
        
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    -- Filter to only include retailers from base (same cohorts + tags as product selection)
    JOIN base ON base.retailer_id = so.retailer_id
    JOIN products ON products.id = pso.product_id
    JOIN brands ON products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id
    JOIN product_units ON product_units.id = products.unit_id
    JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
    JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
    JOIN cities ON cities.id = districts.city_id
    JOIN states ON states.id = cities.state_id
    JOIN regions ON regions.id = states.region_id
    JOIN (SELECT * FROM (VALUES
            ('Cairo', 'El-Marg', 38),
            ('Cairo', 'Mostorod', 1),
            ('Giza', 'Barageel', 236),
            ('Giza', 'Sakkarah', 962),
            ('Delta West', 'El-Mahala', 337),
            ('Delta West', 'Tanta', 8),
            ('Delta East', 'Mansoura FC', 339),
            ('Delta East', 'Sharqya', 170),
            ('Upper Egypt', 'Assiut FC', 501),
            ('Upper Egypt', 'Bani sweif', 401),
            ('Upper Egypt', 'Menya Samalot', 703),
            ('Upper Egypt', 'Sohag', 632),
            ('Alexandria', 'Khorshed Alex', 797)
        ) x(region_name, wh, warehouse_id)
    ) whs ON whs.region_name = CASE WHEN regions.id = 2 THEN states.name_en ELSE regions.name_en END
    JOIN selected_products sp ON sp.warehouse_id = whs.warehouse_id 
        AND sp.product_id = pso.product_id
        AND sp.packing_unit_id = pso.packing_unit_id
    
    WHERE TRUE
        AND so.created_at::date BETWEEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '4 months') AND CURRENT_DATE - 1
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
        AND products.activation = 'true'
    
    GROUP BY 
        whs.wh,
        whs.warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        products.name_ar,
        products.size,
        product_units.name_ar,
        brands.name_ar,
        categories.name_ar,
        so.parent_sales_order_id,
        so.retailer_id,
        so.created_at::date
),

retailer_frequency AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        retailer_id,
        COUNT(DISTINCT parent_sales_order_id) as order_count,
        COUNT(DISTINCT DATE_TRUNC('week', order_date)) as weeks_ordered,
        MIN(order_date) as first_order_date,
        MAX(order_date) as last_order_date,
        DATEDIFF('day', MIN(order_date), MAX(order_date)) as days_span,
        CASE 
            WHEN COUNT(DISTINCT parent_sales_order_id) > 1 
            THEN DATEDIFF('day', MIN(order_date), MAX(order_date)) / (COUNT(DISTINCT parent_sales_order_id) - 1)
            ELSE NULL 
        END as avg_days_between_orders
    FROM raw_order_quantities
    GROUP BY warehouse_id, product_id, packing_unit_id, retailer_id
),

frequent_buyers AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        retailer_id,
        order_count,
        weeks_ordered,
        avg_days_between_orders
    FROM retailer_frequency
    WHERE order_count >= 2 
       OR weeks_ordered >= 2
),

filtered_orders AS (
    SELECT roq.*
    FROM raw_order_quantities roq
    JOIN frequent_buyers fb 
        ON fb.warehouse_id = roq.warehouse_id
        AND fb.product_id = roq.product_id
        AND fb.packing_unit_id = roq.packing_unit_id
        AND fb.retailer_id = roq.retailer_id
),

initial_stats AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_qty) as q1,
        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_qty) as q3,
        MEDIAN(order_qty) as median_qty,
        STDDEV_POP(order_qty) as stddev_qty,
        AVG(order_qty) as avg_qty
    FROM filtered_orders
    GROUP BY warehouse_id, product_id, packing_unit_id
),

cleaned_orders AS (
    SELECT fo.*
    FROM filtered_orders fo
    JOIN initial_stats ist 
        ON ist.warehouse_id = fo.warehouse_id
        AND ist.product_id = fo.product_id
        AND ist.packing_unit_id = fo.packing_unit_id
    WHERE TRUE
        AND fo.order_qty >= ist.q1 - 1.5 * (ist.q3 - ist.q1)
        AND fo.order_qty <= ist.q3 + 1.5 * (ist.q3 - ist.q1)
        AND (ist.stddev_qty = 0 
             OR ABS(fo.order_qty - ist.avg_qty) <= 3 * ist.stddev_qty)
),

-- MODIFIED: Recent orders stats (last 15 days)
recent_trends AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        -- Weighted average gives more importance to recent orders
        SUM(order_qty * recency_weight) / NULLIF(SUM(recency_weight), 0) as weighted_avg_qty,
        -- Last 15 days statistics
        AVG(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_avg,
        MEDIAN(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_median,
        MAX(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_max,
        COUNT(CASE WHEN order_date >= CURRENT_DATE - 15 THEN 1 END) as last_15d_orders
    FROM cleaned_orders
    GROUP BY warehouse_id, product_id, packing_unit_id
),

quantity_stats AS (
    SELECT 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category,
        
        COUNT(DISTINCT parent_sales_order_id) as total_orders,
        COUNT(DISTINCT retailer_id) as total_retailers,
        
        MIN(order_qty) as min_qty,
        MAX(order_qty) as max_qty,
        AVG(order_qty) as avg_qty,
        MEDIAN(order_qty) as median_qty,
        STDDEV_POP(order_qty) as stddev_qty,
        
        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_qty) as q1_qty,
        PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY order_qty) as q2_qty,
        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_qty) as q3_qty,
        PERCENTILE_CONT(0.85) WITHIN GROUP (ORDER BY order_qty) as p85_qty,
        PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY order_qty) as p90_qty,
        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY order_qty) as p95_qty,
        
        SUM(order_value) as total_revenue,
        AVG(order_value) as avg_order_value
        
    FROM cleaned_orders
    GROUP BY 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category
),

frequency_table AS (
    SELECT
        warehouse_id,
        product_id,
        packing_unit_id,
        order_qty,
        COUNT(DISTINCT parent_sales_order_id) AS freq
    FROM cleaned_orders
    GROUP BY warehouse_id, product_id, packing_unit_id, order_qty
),

lag_lead AS (
    SELECT
        warehouse_id,
        product_id,
        packing_unit_id,
        order_qty,
        freq,
        LAG(freq) OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY order_qty) AS prev_freq,
        LEAD(freq) OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY order_qty) AS next_freq
    FROM frequency_table
),

most_frequent_qty AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        order_qty as mode_qty,
        freq as mode_freq,
        freq * 1.0 / SUM(freq) OVER (PARTITION BY warehouse_id, product_id, packing_unit_id) as mode_contribution
    FROM (
        SELECT *,
               ROW_NUMBER() OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY freq DESC, order_qty DESC) as rn
        FROM lag_lead
        WHERE (freq > COALESCE(prev_freq, -1))
          AND (freq > COALESCE(next_freq, -1))
    )
    WHERE rn = 1
),

frequency_metrics AS (
    SELECT 
        fb.warehouse_id,
        fb.product_id,
        fb.packing_unit_id,
        COUNT(DISTINCT fb.retailer_id) as frequent_retailer_count,
        AVG(fb.order_count) as avg_orders_per_retailer,
        AVG(fb.avg_days_between_orders) as avg_refill_days,
        MEDIAN(fb.avg_days_between_orders) as median_refill_days
    FROM frequent_buyers fb
    GROUP BY fb.warehouse_id, fb.product_id, fb.packing_unit_id
),

tier_calculations AS (
    SELECT 
        qs.*,
        COALESCE(mf.mode_qty, qs.median_qty) as mode_qty,
        COALESCE(mf.mode_freq, 0) as mode_freq,
        COALESCE(mf.mode_contribution, 0) as mode_contribution,
        COALESCE(fm.frequent_retailer_count, 0) as frequent_retailer_count,
        COALESCE(fm.avg_orders_per_retailer, 0) as avg_orders_per_retailer,
        COALESCE(fm.avg_refill_days, 0) as avg_refill_days,
        COALESCE(fm.median_refill_days, 0) as median_refill_days,
        
        -- ADD: Recency metrics
        rt.weighted_avg_qty,
        rt.last_15d_avg,
        rt.last_15d_median,
        rt.last_15d_max,
        rt.last_15d_orders,
        
        -- MODIFIED: Tier 1 with 15-day recency factor
        -- Blends historical median with recent trends (70% historical, 30% recent)
        CEIL(GREATEST(
            (0.7 * qs.median_qty + 0.3 * COALESCE(rt.weighted_avg_qty, qs.median_qty)) + 1.0 * COALESCE(qs.stddev_qty, 1),
            qs.q3_qty,
            COALESCE(mf.mode_qty, qs.median_qty) + GREATEST(3, qs.median_qty * 0.3),
            -- If recent 15 days show growth, adjust upward
            CASE 
                WHEN rt.last_15d_orders >= 2 AND rt.last_15d_median > qs.median_qty 
                THEN rt.last_15d_median * 1.2
                ELSE qs.median_qty * 1.3
            END,
            qs.median_qty + 2
        )) as tier_1_qty,
        
        -- MODIFIED: Tier 2 with 15-day recency factor
        CEIL(GREATEST(
            qs.q3_qty + 1.5 * COALESCE(qs.stddev_qty, 1),
            qs.p85_qty + 1.0 * COALESCE(qs.stddev_qty, 1),
            qs.p90_qty + 0.5 * COALESCE(qs.stddev_qty, 1),
            qs.p95_qty,
            -- Blend historical and weighted average
            (0.6 * qs.median_qty + 0.4 * COALESCE(rt.weighted_avg_qty, qs.median_qty)) * 2.0,
            -- If last 15 days show higher demand, adjust tier 2 upward
            CASE 
                WHEN rt.last_15d_orders >= 2 AND rt.last_15d_max > qs.p90_qty 
                THEN rt.last_15d_max * 1.1
                ELSE qs.median_qty * 1.6
            END,
            tier_1_qty*1.3
        )) as tier_2_qty_base
        
    FROM quantity_stats qs
    LEFT JOIN most_frequent_qty mf 
        ON mf.warehouse_id = qs.warehouse_id 
        AND mf.product_id = qs.product_id
        AND mf.packing_unit_id = qs.packing_unit_id
    LEFT JOIN frequency_metrics fm
        ON fm.warehouse_id = qs.warehouse_id
        AND fm.product_id = qs.product_id
        AND fm.packing_unit_id = qs.packing_unit_id
    LEFT JOIN recent_trends rt
        ON rt.warehouse_id = qs.warehouse_id
        AND rt.product_id = qs.product_id
        AND rt.packing_unit_id = qs.packing_unit_id
),

tier_adjustments AS (
    SELECT 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category,
        total_orders,
        total_retailers,
        min_qty,
        avg_qty,
        median_qty,
        stddev_qty,
        q1_qty,
        q3_qty,
        p85_qty,
        p90_qty,
        p95_qty,
        max_qty,
        mode_qty,
        mode_freq,
        mode_contribution,
        frequent_retailer_count,
        avg_orders_per_retailer,
        avg_refill_days,
        median_refill_days,
        total_revenue,
        avg_order_value,
        
        -- ADD: Recency metrics to output
        weighted_avg_qty,
        last_15d_avg,
        last_15d_median,
        last_15d_max,
        last_15d_orders,
        
        tier_1_qty,
        LEAST(
            CEIL(GREATEST(
                tier_2_qty_base,
                tier_1_qty * 1.6
            )),
            GREATEST(
                tier_1_qty * 3.5,
                tier_1_qty + 20
            )
        ) as tier_2_qty
        
    FROM tier_calculations
),

retailer_distribution AS (
    SELECT 
        co.warehouse_id,
        co.product_id,
        co.packing_unit_id,
        ta.tier_1_qty,
        ta.tier_2_qty,
        COUNT(DISTINCT CASE 
            WHEN co.order_qty < ta.tier_1_qty THEN co.retailer_id 
        END) as retailers_below_t1,
        COUNT(DISTINCT CASE 
            WHEN co.order_qty >= ta.tier_1_qty AND co.order_qty < ta.tier_2_qty THEN co.retailer_id 
        END) as retailers_at_t1,
        COUNT(DISTINCT CASE 
            WHEN co.order_qty >= ta.tier_2_qty THEN co.retailer_id 
        END) as retailers_at_t2,
        COUNT(CASE 
            WHEN co.order_qty < ta.tier_1_qty THEN 1 
        END) as orders_below_t1,
        COUNT(CASE 
            WHEN co.order_qty >= ta.tier_1_qty AND co.order_qty < ta.tier_2_qty THEN 1 
        END) as orders_at_t1,
        COUNT(CASE 
            WHEN co.order_qty >= ta.tier_2_qty THEN 1 
        END) as orders_at_t2
    FROM cleaned_orders co
    JOIN tier_adjustments ta 
        ON ta.warehouse_id = co.warehouse_id 
        AND ta.product_id = co.product_id
        AND ta.packing_unit_id = co.packing_unit_id
    GROUP BY 
        co.warehouse_id,
        co.product_id,
        co.packing_unit_id,
        ta.tier_1_qty,
        ta.tier_2_qty
)

SELECT 
    ta.warehouse,
    ta.warehouse_id,
    ta.product_id,
    ta.packing_unit_id,
    ta.sku,
    ta.brand,
    ta.category,
    
    ta.frequent_retailer_count,
    ROUND(ta.avg_orders_per_retailer, 2) as avg_orders_per_retailer,
    ROUND(ta.avg_refill_days, 1) as avg_refill_days,
    ROUND(ta.median_refill_days, 1) as median_refill_days,
    
    ta.total_orders,
    ta.total_retailers,
    
    ta.min_qty,
    ROUND(ta.avg_qty, 2) as avg_qty,
    ta.median_qty,
    ROUND(ta.weighted_avg_qty, 2) as weighted_avg_qty,
    ta.q1_qty as q1_25_qty,
    ta.q3_qty as q3_75_qty,
    ta.p85_qty,
    ta.p90_qty,
    ta.p95_qty,
    ta.max_qty,
    ROUND(ta.stddev_qty, 2) as stddev_qty,
    ta.mode_qty,
    ta.mode_freq,
    ROUND(ta.mode_contribution * 100, 1) as mode_pct,
    
    -- MODIFIED: 15-day trend metrics
    ROUND(ta.last_15d_avg, 2) as last_15d_avg,
    ta.last_15d_median,
    ta.last_15d_max,
    ta.last_15d_orders,
    
    ta.tier_1_qty,
    ta.tier_2_qty,
    ROUND((ta.tier_1_qty - ta.median_qty) * 100.0 / NULLIF(ta.median_qty, 0), 1) as tier_1_increase_pct,
    ROUND((ta.tier_2_qty - ta.median_qty) * 100.0 / NULLIF(ta.median_qty, 0), 1) as tier_2_increase_pct,
    ROUND(ta.tier_2_qty * 1.0 / NULLIF(ta.tier_1_qty, 0), 2) as tier_2_to_tier_1_ratio,
    
    rd.retailers_below_t1,
    rd.retailers_at_t1,
    rd.retailers_at_t2,
    
    rd.orders_below_t1,
    rd.orders_at_t1,
    rd.orders_at_t2,
    
    ROUND(100.0 * rd.retailers_below_t1 / NULLIF(ta.total_retailers, 0), 1) as pct_retailers_below_t1,
    ROUND(100.0 * rd.retailers_at_t1 / NULLIF(ta.total_retailers, 0), 1) as pct_retailers_at_t1,
    ROUND(100.0 * rd.retailers_at_t2 / NULLIF(ta.total_retailers, 0), 1) as pct_retailers_at_t2,
    
    ROUND(100.0 * rd.orders_below_t1 / NULLIF(ta.total_orders, 0), 1) as pct_orders_below_t1,
    ROUND(100.0 * rd.orders_at_t1 / NULLIF(ta.total_orders, 0), 1) as pct_orders_at_t1,
    ROUND(100.0 * rd.orders_at_t2 / NULLIF(ta.total_orders, 0), 1) as pct_orders_at_t2,
    
    ROUND(ta.total_revenue, 2) as total_revenue,
    ROUND(ta.avg_order_value, 2) as avg_order_value

FROM tier_adjustments ta
JOIN retailer_distribution rd 
    ON rd.warehouse_id = ta.warehouse_id 
    AND rd.product_id = ta.product_id
    AND rd.packing_unit_id = ta.packing_unit_id
ORDER BY ta.warehouse, ta.total_orders DESC
'''

# Execute query and convert numeric columns
print("Fetching quantity tier data...")
tiers_selection = snowflake_query("Egypt", query)

for col in tiers_selection.columns:
    tiers_selection[col] = pd.to_numeric(tiers_selection[col], errors='ignore')

print(f"✓ Calculated tiers for {len(tiers_selection)} product-warehouse combinations")


Fetching quantity tier data...
✓ Calculated tiers for 3955 product-warehouse combinations


In [17]:
# =============================================================================
# APPLY FEEDBACK ADJUSTMENTS TO TIER QUANTITIES
# =============================================================================

# Store T3 (wholesale) feedback for later use (applied after ws_new calculation)
feedback_t3_data = pd.DataFrame()

if len(feedback_data) > 0:
    print("\n" + "="*80)
    print("APPLYING FEEDBACK LOOP ADJUSTMENTS")
    print("="*80)
    
    # Store T3 feedback for later (will be applied after ws_new_qty is calculated)
    t3_cols = ['warehouse_id', 'product_id', 'packing_unit_id', 
               'suggested_t3_qty', 'suggested_t3_discount', 't3_action']
    available_t3_cols = [c for c in t3_cols if c in feedback_data.columns]
    if len(available_t3_cols) > 3:  # Has T3 suggestions
        feedback_t3_data = feedback_data[available_t3_cols].copy()
        print(f"  T3 (wholesale) feedback stored for later: {feedback_t3_data['suggested_t3_qty'].notna().sum()} SKUs")
    
    # Merge feedback with tiers_selection
    tiers_with_feedback = tiers_selection.merge(
        feedback_data[['warehouse_id', 'product_id', 'packing_unit_id', 
                       'suggested_t1_qty', 'suggested_t2_qty', 
                       't1_action', 't2_action', 'margin_status']],
        on=['warehouse_id', 'product_id', 'packing_unit_id'],
        how='left'
    )
    
    print(f"\n  SKUs with feedback data: {tiers_with_feedback['suggested_t1_qty'].notna().sum()} / {len(tiers_with_feedback)}")
    
    # ==========================================================================
    # SMART T1 QUANTITY ADJUSTMENTS
    # Only apply if recommendation actually improves the current value
    # ==========================================================================
    mask_t1 = tiers_with_feedback['suggested_t1_qty'].notna()
    t1_applied = 0
    t1_skipped_already_better = 0
    
    if mask_t1.sum() > 0:
        original_t1 = tiers_with_feedback.loc[mask_t1, 'tier_1_qty'].copy()
        suggested_t1 = tiers_with_feedback.loc[mask_t1, 'suggested_t1_qty']
        t1_action = tiers_with_feedback.loc[mask_t1, 't1_action'].fillna('')
        
        # Determine which rows should be updated
        # Skip if: action is INCREASE and script already >= suggested
        # Skip if: action is DECREASE and script already <= suggested
        skip_t1_increase = t1_action.str.contains('INCREASE', case=False, na=False) & (original_t1 >= suggested_t1)
        skip_t1_decrease = t1_action.str.contains('DECREASE|REDUCE', case=False, na=False) & (original_t1 <= suggested_t1)
        skip_t1 = skip_t1_increase | skip_t1_decrease
        
        # Apply only where NOT skipped
        apply_t1 = mask_t1.copy()
        apply_t1.loc[mask_t1] = ~skip_t1.values
        
        if apply_t1.sum() > 0:
            tiers_with_feedback.loc[apply_t1, 'tier_1_qty'] = tiers_with_feedback.loc[apply_t1, 'suggested_t1_qty'].astype(int)
            avg_change = ((tiers_with_feedback.loc[apply_t1, 'tier_1_qty'] - original_t1.loc[~skip_t1]) / original_t1.loc[~skip_t1] * 100).mean()
            t1_applied = apply_t1.sum()
            print(f"  ✓ Applied T1 qty adjustments: {t1_applied} SKUs (avg change: {avg_change:+.1f}%)")
        
        t1_skipped_already_better = skip_t1.sum()
        if t1_skipped_already_better > 0:
            print(f"  ⚠ Skipped T1 qty for {t1_skipped_already_better} SKUs (script already better)")
    
    # ==========================================================================
    # SMART T2 QUANTITY ADJUSTMENTS
    # Only apply if recommendation actually improves the current value
    # ==========================================================================
    mask_t2 = tiers_with_feedback['suggested_t2_qty'].notna()
    t2_applied = 0
    t2_skipped_already_better = 0
    
    if mask_t2.sum() > 0:
        original_t2 = tiers_with_feedback.loc[mask_t2, 'tier_2_qty'].copy()
        suggested_t2 = tiers_with_feedback.loc[mask_t2, 'suggested_t2_qty']
        t2_action = tiers_with_feedback.loc[mask_t2, 't2_action'].fillna('')
        
        # Determine which rows should be updated
        skip_t2_increase = t2_action.str.contains('INCREASE', case=False, na=False) & (original_t2 >= suggested_t2)
        skip_t2_decrease = t2_action.str.contains('DECREASE|REDUCE', case=False, na=False) & (original_t2 <= suggested_t2)
        skip_t2 = skip_t2_increase | skip_t2_decrease
        
        # Apply only where NOT skipped
        apply_t2 = mask_t2.copy()
        apply_t2.loc[mask_t2] = ~skip_t2.values
        
        if apply_t2.sum() > 0:
            tiers_with_feedback.loc[apply_t2, 'tier_2_qty'] = tiers_with_feedback.loc[apply_t2, 'suggested_t2_qty'].astype(int)
            avg_change = ((tiers_with_feedback.loc[apply_t2, 'tier_2_qty'] - original_t2.loc[~skip_t2]) / original_t2.loc[~skip_t2] * 100).mean()
            t2_applied = apply_t2.sum()
            print(f"  ✓ Applied T2 qty adjustments: {t2_applied} SKUs (avg change: {avg_change:+.1f}%)")
        
        t2_skipped_already_better = skip_t2.sum()
        if t2_skipped_already_better > 0:
            print(f"  ⚠ Skipped T2 qty for {t2_skipped_already_better} SKUs (script already better)")
    
    # ==========================================================================
    # VALIDATE TIER QUANTITY CONSTRAINTS: T1 qty < T2 qty
    # ==========================================================================
    print("\n  Validating tier constraints...")
    
    invalid_qty = tiers_with_feedback['tier_2_qty'] <= tiers_with_feedback['tier_1_qty']
    if invalid_qty.sum() > 0:
        # Fix: Set T2 qty = T1 qty * 1.5
        tiers_with_feedback.loc[invalid_qty, 'tier_2_qty'] = (
            tiers_with_feedback.loc[invalid_qty, 'tier_1_qty'] * 1.5
        ).astype(int)
        print(f"    Fixed {invalid_qty.sum()} SKUs where T2 qty <= T1 qty")
    
    # Update tiers_selection with adjusted values
    tiers_selection = tiers_with_feedback.drop(
        columns=['suggested_t1_qty', 'suggested_t2_qty', 't1_action', 't2_action', 'margin_status'], 
        errors='ignore'
    )
    
    # Summary by action
    if 't1_action' in tiers_with_feedback.columns:
        t1_actions = tiers_with_feedback[tiers_with_feedback['t1_action'].notna()]['t1_action'].value_counts()
        print(f"\n  T1 Actions applied:")
        for action, count in t1_actions.items():
            if action != 'NO_CHANGE':
                print(f"    - {action}: {count}")
    
    if 't2_action' in tiers_with_feedback.columns:
        t2_actions = tiers_with_feedback[tiers_with_feedback['t2_action'].notna()]['t2_action'].value_counts()
        print(f"\n  T2 Actions applied:")
        for action, count in t2_actions.items():
            if action != 'NO_CHANGE':
                print(f"    - {action}: {count}")
    
    if 'margin_status' in tiers_with_feedback.columns:
        margin_dist = tiers_with_feedback[tiers_with_feedback['margin_status'].notna()]['margin_status'].value_counts()
        print(f"\n  Margin Status of adjusted SKUs:")
        for status, count in margin_dist.items():
            print(f"    - {status}: {count}")
    
    print("\n" + "="*80)
else:
    print("⚠ No feedback data available - using original tier calculations")


APPLYING FEEDBACK LOOP ADJUSTMENTS
  T3 (wholesale) feedback stored for later: 521 SKUs

  SKUs with feedback data: 1394 / 3955
  ✓ Applied T1 qty adjustments: 1269 SKUs (avg change: +16.0%)
  ⚠ Skipped T1 qty for 125 SKUs (script already better)
  ✓ Applied T2 qty adjustments: 1130 SKUs (avg change: +22.2%)
  ⚠ Skipped T2 qty for 264 SKUs (script already better)

  Validating tier constraints...
    Fixed 2 SKUs where T2 qty <= T1 qty

  T1 Actions applied:
    - INCREASE_QTY: 254
    - SLIGHT_INCREASE_QTY: 135
    - INCREASE_DISCOUNT: 129
    - DECREASE_QTY: 8

  T2 Actions applied:
    - INCREASE_QTY: 639
    - INCREASE_DISCOUNT: 250
    - DECREASE_QTY: 20
    - REDUCE_RATIO: 3

  Margin Status of adjusted SKUs:
    - CRITICAL: 789
    - HIGH: 278
    - HEALTHY: 231
    - LOW: 96



### SKU Information & Cost Data


In [18]:
query = f'''
SELECT DISTINCT  
    products.id as product_id,
    CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) as sku,
    brands.name_ar as brand, 
    categories.name_ar as cat,
    f.wac_p
FROM products 
JOIN brands ON products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f ON f.product_id = products.id 
    AND CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) 
        BETWEEN f.from_date AND f.to_date 
JOIN product_units ON product_units.id = products.unit_id 
'''

print("Fetching SKU information and WAC data...")
sku_info = snowflake_query("Egypt", query)
sku_info['product_id'] = pd.to_numeric(sku_info['product_id'])
sku_info['wac_p'] = pd.to_numeric(sku_info['wac_p'])

print(f"✓ Retrieved cost data for {len(sku_info)} SKUs")

Fetching SKU information and WAC data...
✓ Retrieved cost data for 8147 SKUs


## 4. Market Prices

Gather competitive pricing data from multiple sources:
- **Marketplace prices** - Regional marketplace data with fallbacks
- **Ben Soliman prices** - Competitor pricing
- **Scraped prices** - Web-scraped competitor data
- **Product statistics** - Historical margin boundaries

### 4.1 Marketplace Prices


In [19]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id)),
full_data as (
select products.id as product_id, region,warehouse_id
from products , whs 
where activation = 'true'
),				

MP as (
select region,product_id,
min(min_price) as min_price,
min(max_price) as max_price,
min(mod_price) as mod_price,
min(true_min) as true_min,
min(true_max) as true_max

from (
select mp.region,mp.product_id,mp.pu_id,
min_price/BASIC_UNIT_COUNT as min_price,
max_price/BASIC_UNIT_COUNT as max_price,
mod_price/BASIC_UNIT_COUNT as mod_price,
TRUE_MIN_PRICE/BASIC_UNIT_COUNT as true_min,
TRUE_MAX_PRICE/BASIC_UNIT_COUNT as true_max
from materialized_views.marketplace_prices mp 
join packing_unit_products pup on pup.product_id = mp.product_id and pup.packing_unit_id = mp.pu_id
join finance.all_cogs f on f.product_id = mp.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date
where  least(min_price,mod_price) between wac_p*0.9 and wac_p*1.3 
)
group by all 
),
region_mapping AS (
    SELECT * 
	FROM 
	(	VALUES
        ('Delta East', 'Delta West'),
        ('Delta West', 'Delta East'),
        ('Alexandria', 'Cairo'),
        ('Alexandria', 'Giza'),
        ('Upper Egypt', 'Cairo'),
        ('Upper Egypt', 'Giza'),
		('Cairo','Giza'),
		('Giza','Cairo'),
		('Delta West', 'Cairo'),
		('Delta East', 'Cairo'),
		('Delta West', 'Giza'),
		('Delta East', 'Giza')
		)
    AS region_mapping(region, fallback_region)
)


select region,warehouse_id,product_id,
min(final_min_price) as final_min_price,
min(final_max_price) as final_max_price,
min(final_mod_price) as final_mod_price,
min(final_true_min) as final_true_min,
min(final_true_max) as final_true_max

from (
SELECT
distinct 
	w.region,
    w.warehouse_id,
	w.product_id,
    COALESCE(m1.min_price, m2.min_price) AS final_min_price,
    COALESCE(m1.max_price, m2.max_price) AS final_max_price,
    COALESCE(m1.mod_price, m2.mod_price) AS final_mod_price,
	COALESCE(m1.true_min, m2.true_min) AS final_true_min,
	COALESCE(m1.true_max, m2.true_max) AS final_true_max,
FROM full_data w
LEFT JOIN MP m1
    ON w.region = m1.region and w.product_id = m1.product_id
JOIN region_mapping rm
    ON w.region = rm.region
LEFT JOIN MP m2
    ON rm.fallback_region = m2.region
   AND w.product_id = m2.product_id
)
where final_min_price is not null 
group by all 
'''

print("Fetching marketplace prices...")
marketplace = snowflake_query("Egypt", query)
marketplace.columns = marketplace.columns.str.lower()

for col in marketplace.columns:
    marketplace[col] = pd.to_numeric(marketplace[col], errors='ignore')

print(f"✓ Retrieved marketplace prices for {len(marketplace)} products")

Fetching marketplace prices...
✓ Retrieved marketplace prices for 24853 products


### 4.2 Ben Soliman (Competitor) Prices


In [20]:
query = f'''
with lower as (
select distinct product_id,sku,new_d*bs_price as ben_soliman_price,INJECTION_DATE
from (
select maxab_product_id as product_id,maxab_sku as sku,INJECTION_DATE,wac1,wac_p,(bs_price/bs_unit_count) as bs_price,diff,cu_price,case when p1 > 1 then child_quantity else 0 end as scheck,round(p1/2)*2 as p1,p2,case when (ROUND(p1 / scheck) * scheck) = 0 then p1 else (ROUND(p1 / scheck) * scheck) end as new_d
from (
select sm.*,wac1, wac_p, abs((bs_price/bs_unit_count)-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff,cpc.price as cu_price,pup.child_quantity , round((cu_price/(bs_price/bs_unit_count))) as p1, round(((bs_price/bs_unit_count)/cu_price)) as p2
from materialized_views.savvy_mapping sm 
join finance.all_cogs f on f.product_id = sm.maxab_product_id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_Date and f.to_date
join   PACKING_UNIT_PRODUCTS pu on pu.product_id = sm.maxab_product_id and pu.IS_BASIC_UNIT = 1 
join cohort_product_packing_units cpc on cpc.PRODUCT_PACKING_UNIT_ID = pu.id and cohort_id = 700 
join packing_unit_products pup on pup.product_id = sm.maxab_product_id and pup.is_basic_unit = 1  
where bs_price is not null and INJECTION_DATE::date >= CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 5 
and diff > 0.3
and p1 > 1
)
)
qualify max(INJECTION_DATE)over(partition by product_id)  = INJECTION_DATE
),
m_bs as (
select z.* from (
	select maxab_product_id as product_id, maxab_sku as sku, avg(bs_final_price) as ben_soliman_price,INJECTION_DATE
	from (
		select *, row_number() over(partition by maxab_product_id order by diff) as rnk_2 from (
			select *, (bs_final_price-wac_p)/wac_p as diff_2 from (
				select *, bs_price/maxab_basic_unit_count as bs_final_price from (
					select *, row_number() over(partition by maxab_product_id, maxab_pu order by diff) as rnk from (
						select * ,max(INJECTION_DATE::date) over(partition by maxab_product_id, maxab_pu) as max_date,
						from (
							select sm.*,wac1, wac_p, abs(bs_price-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff 
					from materialized_views.savvy_mapping sm 
					join finance.all_cogs f on f.product_id = sm.maxab_product_id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_Date and f.to_date
					where bs_price is not null and INJECTION_DATE::date >= CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 5 
					and diff < 0.3
					)
					qualify max_date = INJECTION_DATE
					) qualify rnk = 1 
				)
			) where diff_2 between -0.5 and 0.5 
		) qualify rnk_2 = 1 
	) group by all
) z 
join finance.all_cogs f on f.product_id = z.product_id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_Date and f.to_date
where ben_soliman_price between f.wac_p*0.7 and f.wac_p*1.3
)
select product_id,avg(ben_soliman_price) as ben_soliman_price
from (
select *
from (
select * 
from m_bs 

union all

 select *
 from lower
 )
 qualify max(INJECTION_DATE) over(partition by product_id) = INJECTION_DATE
 )
 group by all
'''

print("Fetching Ben Soliman (competitor) prices...")
bensoliman = snowflake_query("Egypt", query)
bensoliman.columns = bensoliman.columns.str.lower()

for col in bensoliman.columns:
    bensoliman[col] = pd.to_numeric(bensoliman[col], errors='ignore')

print(f"✓ Retrieved competitor prices for {len(bensoliman)} products")

Fetching Ben Soliman (competitor) prices...
✓ Retrieved competitor prices for 1566 products


### 4.3 Scraped Competitor Prices


In [21]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id))
select product_id,x.region,warehouse_id,min(MARKET_PRICE) as min_scrapped,max(MARKET_PRICE) as max_scrapped,median(MARKET_PRICE) as median_scrapped
from (
select MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.*,max(date)over(partition by region,MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id,competitor) as max_date
from MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES
join finance.all_cogs f on f.product_id = MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date 
where date>= current_date -5
and MARKET_PRICE between f.wac_p * 0.9 and wac_p*1.3
qualify date = max_date 
) x 
left join whs on whs.region = x.region
group by all 
'''

print("Fetching scraped competitor prices...")
scrapped_prices = snowflake_query("Egypt", query)
scrapped_prices.columns = scrapped_prices.columns.str.lower()

for col in scrapped_prices.columns:
    scrapped_prices[col] = pd.to_numeric(scrapped_prices[col], errors='ignore')

print(f"✓ Retrieved scraped prices for {len(scrapped_prices)} products")

Fetching scraped competitor prices...
✓ Retrieved scraped prices for 10257 products


### 4.4 Product Statistics (Margin Boundaries)


In [22]:
query = f'''
SELECT 
    region,
    product_id,
    optimal_bm,
    MIN_BOUNDARY,
    MAX_BOUNDARY,
    MEDIAN_BM
FROM (
    SELECT 
        region,
        product_id,
        target_bm,
        optimal_bm,
        MIN_BOUNDARY,
        MAX_BOUNDARY,
        MEDIAN_BM,
        MAX(created_at) OVER (PARTITION BY product_id, region) as max_date,
        created_at
    FROM materialized_views.PRODUCT_STATISTICS
    WHERE created_at::date >= DATE_TRUNC('month', CURRENT_DATE - 60)
    QUALIFY max_date = created_at
)
'''

print("Fetching product statistics (margin boundaries)...")
stats = snowflake_query("Egypt", query)
stats.columns = stats.columns.str.lower()

for col in stats.columns:
    stats[col] = pd.to_numeric(stats[col], errors='ignore')

print(f"✓ Retrieved margin statistics for {len(stats)} products")

Fetching product statistics (margin boundaries)...
✓ Retrieved margin statistics for 18160 products


### 4.5 Warehouse-Region Mapping


In [23]:
query = f'''
SELECT warehouse_id, region
FROM (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY warehouse_id ORDER BY nmv DESC) as rnk 
    FROM (
        SELECT 
            CASE WHEN regions.id = 2 THEN cities.name_en ELSE regions.name_en END as region,
            pso.warehouse_id,
            SUM(pso.total_price) as nmv
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
        JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
        JOIN cities ON cities.id = districts.city_id
        JOIN states ON states.id = cities.state_id
        JOIN regions ON regions.id = states.region_id             
        WHERE TRUE
            AND so.created_at::date BETWEEN CURRENT_DATE - 31 AND CURRENT_DATE - 1
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
        GROUP BY ALL
    )
    QUALIFY rnk = 1 
)
'''

print("Fetching warehouse-region mapping...")
warehouse_region = snowflake_query("Egypt", query)
warehouse_region.columns = warehouse_region.columns.str.lower()

for col in warehouse_region.columns:
    warehouse_region[col] = pd.to_numeric(warehouse_region[col], errors='ignore')

print(f"✓ Mapped {len(warehouse_region)} warehouses to regions")

Fetching warehouse-region mapping...
✓ Mapped 15 warehouses to regions


### 4.6 Target Margins (Brand/Category)


In [24]:
# Brand-level target margins
query = f'''
SELECT DISTINCT cat, brand, margin as target_bm
FROM performance.commercial_targets cplan
QUALIFY 
    CASE 
        WHEN DATE_TRUNC('month', MAX(DATE) OVER()) = DATE_TRUNC('month', CURRENT_DATE) 
        THEN DATE_TRUNC('month', CURRENT_DATE)
        ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') 
    END = DATE_TRUNC('month', date)
'''

print("Fetching brand target margins...")
brand_cat_target = snowflake_query("Egypt", query)
brand_cat_target['target_bm'] = pd.to_numeric(brand_cat_target['target_bm'])
print(f"✓ Retrieved targets for {len(brand_cat_target)} brand-category combinations")

# Category-level weighted target margins
query = f'''
SELECT cat, SUM(target_bm * (target_nmv / cat_total)) as cat_target_margin
FROM (
    SELECT *, SUM(target_nmv) OVER (PARTITION BY cat) as cat_total
    FROM (
        SELECT cat, brand, AVG(target_bm) as target_bm, SUM(target_nmv) as target_nmv
        FROM (
            SELECT DISTINCT 
                date, 
                city as region, 
                cat, 
                brand, 
                margin as target_bm, 
                nmv as target_nmv
            FROM performance.commercial_targets cplan
            QUALIFY 
                CASE 
                    WHEN DATE_TRUNC('month', MAX(DATE) OVER()) = DATE_TRUNC('month', CURRENT_DATE) 
                    THEN DATE_TRUNC('month', CURRENT_DATE)
                    ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') 
                END = DATE_TRUNC('month', date)
        )
        GROUP BY ALL
    )
)
GROUP BY ALL 
'''

print("Fetching category target margins...")
cat_target = snowflake_query("Egypt", query)
cat_target['cat_target_margin'] = pd.to_numeric(cat_target['cat_target_margin'])
print(f"✓ Retrieved targets for {len(cat_target)} categories")

Fetching brand target margins...
✓ Retrieved targets for 478 brand-category combinations
Fetching category target margins...
✓ Retrieved targets for 73 categories


### 4.7 Merge All Data Sources


### LIVE CART Rules

In [25]:
query = '''
SELECT 
    cppu.cohort_id,
    product_id,
    packing_unit_id,
    basic_unit_count,
    COALESCE(cppu.MAX_PER_SALES_ORDER, cppu2.MAX_PER_SALES_ORDER) as current_cart_rule
FROM COHORT_PRODUCT_PACKING_UNITS cppu 
JOIN PACKING_UNIT_PRODUCTS pup ON cppu.PRODUCT_PACKING_UNIT_ID = pup.id 
JOIN cohorts c ON c.id = cppu.cohort_id
JOIN COHORT_PRODUCT_PACKING_UNITS cppu2 
    ON cppu.PRODUCT_PACKING_UNIT_ID = cppu2.PRODUCT_PACKING_UNIT_ID 
    AND cppu2.cohort_id = c.FALLBACK_COHORT_ID 
WHERE cppu.cohort_id IN (700, 701, 702, 703, 704, 1123, 1124, 1125, 1126)
'''

print("Fetching live cart rules...")
live_cart_rules = snowflake_query("Egypt", query) 
live_cart_rules.columns = live_cart_rules.columns.str.lower()

for col in live_cart_rules.columns:
    live_cart_rules[col] = pd.to_numeric(live_cart_rules[col], errors='ignore')

print(f"✓ Retrieved {len(live_cart_rules)} cart rules")

Fetching live cart rules...
✓ Retrieved 110660 cart rules


In [26]:
# =============================================================================
# MERGE ALL DATA SOURCES
# =============================================================================

print("Merging all data sources...")

# Start with selected products + tier quantities
final_data = selected_products.merge(
    tiers_selection[[
        'warehouse_id', 'product_id', 'packing_unit_id',
        'tier_1_qty', 'tier_2_qty', 'median_qty', 'stddev_qty',
        'tier_1_increase_pct', 'tier_2_increase_pct'
    ]],
    on=['warehouse_id', 'product_id', 'packing_unit_id']
)

# Select relevant columns
final_data = final_data[[
    'warehouse_id', 'product_id', 'packing_unit_id', 'sku', 'brand', 'cat',
    'packing_unit_price', 'basic_unit_count', 
    'tier_1_qty', 'tier_2_qty', 'median_qty', 'stddev_qty',
    'tier_1_increase_pct', 'tier_2_increase_pct', 'final_rank'
]]

# Add WAC (weighted average cost)
final_data = final_data.merge(sku_info[['product_id', 'wac_p']], on='product_id')
final_data['wac_p'] = (final_data['wac_p'] * final_data['basic_unit_count']).round(2)

# Add marketplace prices
final_data = final_data.merge(marketplace, on=['product_id', 'warehouse_id'], how='left')
final_data = final_data.drop(columns='region')

# Add competitor prices
final_data = final_data.merge(bensoliman[['product_id', 'ben_soliman_price']], on=['product_id'], how='left')
final_data = final_data.merge(scrapped_prices, on=['product_id', 'warehouse_id'], how='left')
final_data = final_data.drop(columns='region')

# Add region and margin data
final_data = final_data.merge(warehouse_region, on=['warehouse_id'])
final_data = final_data.merge(stats, on=['product_id', 'region'], how='left')
final_data = final_data.merge(brand_cat_target, on=['brand', 'cat'], how='left')
final_data = final_data.merge(cat_target, on=['cat'], how='left')

# Use brand target margin, fall back to category target margin
final_data['Target_margin'] = final_data['target_bm'].fillna(final_data['cat_target_margin'])

print(f"✓ Merged data: {len(final_data)} products with all pricing data")

Merging all data sources...
✓ Merged data: 3955 products with all pricing data


In [27]:
# Cohort to Warehouse mapping
mapping_coh_wh = pd.DataFrame({
    'region':       ['Cairo', 'Cairo', 'Giza', 'Delta West', 'Delta West', 'Delta East', 
                     'Delta East', 'Upper Egypt', 'Upper Egypt', 'Upper Egypt', 'Upper Egypt', 
                     'Alexandria', 'Giza'],
    'wh':           ['El-Marg', 'Mostorod', 'Barageel', 'El-Mahala', 'Tanta', 'Mansoura FC',
                     'Sharqya', 'Assiut FC', 'Bani sweif', 'Menya Samalot', 'Sohag',
                     'Khorshed Alex', 'Sakkarah'],
    'warehouse_id': [38, 1, 236, 337, 8, 339, 170, 501, 401, 703, 632, 797, 962],
    'cohort_id':    [700, 700, 701, 703, 703, 704, 704, 1124, 1126, 1123, 1125, 702, 701]
})


In [28]:
# Add warehouse mapping to cart rules
live_cart_rules = live_cart_rules.merge(mapping_coh_wh, on='cohort_id')
print(f"✓ Cart rules mapped to {live_cart_rules['warehouse_id'].nunique()} warehouses")

✓ Cart rules mapped to 13 warehouses


## 5. Price Tier Calculation

Calculate tier 1 and tier 2 prices with constraints:
- **Max discount**: 5% from current price
- **Min discount**: 0.35% from current price  
- **Ratio bounds**: discount-to-quantity ratio between 1.05 and 3
- **Price ordering**: WAC < Tier 2 < Tier 1 < Current Price
- **Tier 1 margin constraint**: margin >= 60% of current margin
- **Tier 2 margin constraint**: margin >= 40% of current margin


### 5.1 Price Calculation Functions

The `calculate_tier_prices` function uses multiple strategies:
1. **Market prices strategy** - Use competitive pricing data if available
2. **Margin range strategy** - Calculate from margin boundaries if no market data
3. **Ratio adjustment** - Adjust tier_2 price to meet discount-to-quantity ratio bounds


In [29]:
def calculate_tier_prices(row, max_discount_pct=5.0, min_discount_pct=0.35, min_ratio=1.1, max_ratio=3):
    """
    Calculate tier 1 and tier 2 prices for a single row.
    
    Parameters:
    - max_discount_pct: Maximum allowed discount from current price (default: 5%)
    - min_discount_pct: Minimum required discount from current price (default: 0.35%)
    - min_ratio: Minimum discount-to-quantity ratio (default: 1.3)
    - max_ratio: Maximum discount-to-quantity ratio (default: 3.5)
    
    Constraints:
    - Tier 1 margin must be >= 60% of current margin
    - Tier 2 margin must be >= 40% of current margin
    - Ensure: WAC < Tier 2 < Tier 1 < Current Price
    - Ensure: BOTH tiers must be valid or BOTH are None
    - Ensure: discount_qty_ratio = (tier_2_discount/tier_1_discount) / (tier_2_qty/tier_1_qty) is between min_ratio and max_ratio
    """
    
    current_price = row['packing_unit_price']
    wac = row['wac_p']
    
    # Get basic_unit_count for converting market prices
    basic_unit_count = row.get('basic_unit_count', 1)
    if pd.isna(basic_unit_count) or basic_unit_count <= 0:
        basic_unit_count = 1
    
    # Validation
    if pd.isna(current_price) or current_price <= 0:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'invalid_current_price'})
    
    if pd.isna(wac) or wac <= 0:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'invalid_wac'})
    
    if current_price <= wac:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'current_price_below_wac'})
    
    # Calculate discount bounds
    max_discount_price = current_price * (1 - max_discount_pct / 100)  # Minimum allowed price
    min_discount_price = current_price * (1 - min_discount_pct / 100)  # Maximum allowed price
    
    # ==========================================================================
    # MARGIN-BASED MINIMUM PRICES (NEW CONSTRAINT)
    # ==========================================================================
    # Calculate current margin: margin = (price - wac) / price
    current_margin = (current_price - wac) / current_price
    
    # Tier 1: minimum margin = 60% of current margin
    min_t1_margin = 0.60 * current_margin
    # Calculate minimum T1 price: price = wac / (1 - margin)
    min_t1_price = wac / (1 - min_t1_margin) if min_t1_margin < 1 else current_price
    
    # Tier 2: minimum margin = 40% of current margin
    min_t2_margin = 0.40 * current_margin
    # Calculate minimum T2 price: price = wac / (1 - margin)
    min_t2_price = wac / (1 - min_t2_margin) if min_t2_margin < 1 else current_price
    
    # ==========================================================================
    # ABSOLUTE MINIMUM PRICE (legacy constraint - use higher of this or margin-based)
    # ==========================================================================
    absolute_min_price = wac*0.9  # Default to WAC if no target_margin
    
    if 'target_margin' in row.index and pd.notna(row['target_margin']) and 0 < row['target_margin'] < 1:
        target_margin = row['target_margin']
        # Minimum margin is 30% of target margin
        min_margin = target_margin * 0.5
        # Calculate minimum price: price = wac / (1 - min_margin)
        absolute_min_price = wac / (1 - min_margin)
    else:
        # Fallback: use wac_cushion_pct
        wac_cushion_pct = 0.25
        absolute_min_price = wac / (1 - (wac_cushion_pct / 100))
    
    # Use the HIGHER of absolute_min_price or margin-based minimums
    min_t1_price = max(min_t1_price, absolute_min_price)
    min_t2_price = max(min_t2_price, absolute_min_price)
    
    # Market price columns (these are per basic unit)
    market_cols = [
        'final_mod_price', 'median_scrapped', 'final_max_price', 
        'ben_soliman_price', 'max_scrapped', 'final_true_max',
        'final_min_price', 'min_scrapped', 'final_true_min'
    ]
    
    # Extract valid market prices (multiply by basic_unit_count, above absolute_min_price, within discount bounds)
    valid_market_prices = []
    for col in market_cols:
        if col in row.index and pd.notna(row[col]) and row[col] > 0:
            # Convert basic unit price to packing unit price
            packing_price = row[col] * basic_unit_count
            
            # Must be: above absolute_min_price AND within discount bounds
            if max_discount_price <= packing_price <= min_discount_price and packing_price > (wac*basic_unit_count)*0.9:
                valid_market_prices.append(packing_price)
    
    # Remove duplicates and sort descending
    valid_market_prices = sorted(list(set(valid_market_prices)), reverse=True)
    
    tier_1 = None
    tier_2 = None
    source = ''
    
    min_gap_pct = 0.25
    
    # Strategy 1: Use market prices
    if len(valid_market_prices) >= 3:
        # Select from available prices
        tier_1 = valid_market_prices[0]  # Highest price
        
        # Find tier 2 with minimum gap
        for price in valid_market_prices[1:]:
            if price < tier_1 * (1 - min_gap_pct / 100):
                tier_2 = price
                break
        
        # If no suitable tier 2 found, take second highest
        if tier_2 is None and len(valid_market_prices) > 1:
            tier_2 = valid_market_prices[1]
        
        if tier_1 is not None and tier_2 is not None:
            source = 'market_prices'
    
    elif len(valid_market_prices) == 2:
        tier_1 = valid_market_prices[0]
        tier_2 = valid_market_prices[1]
        source = 'market_prices'
    
    elif len(valid_market_prices) == 1:
        # Only one market price - use margin range for the other
        market_price = valid_market_prices[0]
        
        # Calculate which tier this should be based on its position
        price_position = (market_price - max_discount_price) / (min_discount_price - max_discount_price)
        
        # If in upper half (>0.5), use as tier 1 and calculate tier 2
        # If in lower half (<=0.5), use as tier 2 and calculate tier 1
        if price_position > 0.5:
            tier_1 = market_price
            tier_2 = calculate_from_margin_range(row, wac, current_price, tier_1, tier=2, 
                                                 max_discount_price=max_discount_price,
                                                 min_discount_price=min_discount_price,
                                                 absolute_min_price=absolute_min_price)
            if tier_2 is not None:
                source = 'market_tier1_margin_tier2'
        else:
            tier_2 = market_price
            tier_1 = calculate_from_margin_range(row, wac, current_price, tier_2, tier=1,
                                                 max_discount_price=max_discount_price,
                                                 min_discount_price=min_discount_price,
                                                 absolute_min_price=absolute_min_price)
            if tier_1 is not None:
                source = 'margin_tier1_market_tier2'
    
    # Strategy 2: No market prices - use margin range method
    if tier_1 is None or tier_2 is None:
        tier_1, tier_2 = calculate_both_from_margin_range(row, wac, current_price,
                                                          max_discount_price=max_discount_price,
                                                          min_discount_price=min_discount_price,
                                                          absolute_min_price=absolute_min_price)
        if tier_1 is not None and tier_2 is not None:
            source = 'margin_range_based'
    
    # CRITICAL: Final validation - BOTH must be valid or BOTH are None
    if tier_1 is not None and tier_2 is not None:
        # Ensure correct ordering
        if tier_2 >= tier_1:
            tier_1, tier_2 = max(tier_1, tier_2), min(tier_1, tier_2)
        
        # Apply discount bounds
        tier_1 = max(tier_1, max_discount_price)
        tier_1 = min(tier_1, min_discount_price)
        tier_2 = max(tier_2, max_discount_price)
        tier_2 = min(tier_2, min_discount_price)
        
        # ==========================================================================
        # ENFORCE MARGIN-BASED MINIMUM PRICES (NEW CONSTRAINT)
        # Tier 1: must maintain >= 60% of current margin
        # Tier 2: must maintain >= 40% of current margin
        # ==========================================================================
        # Enforce T1 minimum (60% of current margin)
        if tier_1 < min_t1_price:
            tier_1 = min_t1_price
        
        # Enforce T2 minimum (40% of current margin)
        if tier_2 < min_t2_price:
            tier_2 = min_t2_price
        
        # Check if prices are still valid after enforcement
        if tier_1 <= wac or tier_2 <= wac:
            tier_1 = None
            tier_2 = None
            source = 'prices_below_wac_after_margin_enforcement'
        elif tier_2 >= tier_1:
            # T2 minimum pushed it above T1 - invalid
            tier_1 = None
            tier_2 = None
            source = 'margin_constraints_conflict'
        elif tier_1 > min_discount_price or tier_2 > min_discount_price:
            # Prices pushed above max allowed (min discount)
            tier_1 = None
            tier_2 = None
            source = 'margin_constraints_exceed_discount_bounds'
        else:
            # Ensure minimum gap between tiers
            if tier_2 > tier_1 * (1 - min_gap_pct / 100):
                tier_2 = tier_1 * (1 - min_gap_pct / 100)
                # Re-check T2 minimum after gap adjustment
                if tier_2 < min_t2_price:
                    tier_1 = None
                    tier_2 = None
                    source = 'insufficient_gap_with_margin_constraint'
            
            # Final check: both still valid?
            if tier_1 is not None and tier_2 is not None:
                if not (wac < tier_2 < tier_1 < current_price):
                    tier_1 = None
                    tier_2 = None
                    source = 'invalid_tier_ordering'
                elif not (max_discount_price <= tier_2 and tier_1 <= min_discount_price):
                    tier_1 = None
                    tier_2 = None
                    source = 'tiers_outside_discount_bounds'
                else:
                    tier_1 = round(tier_1, 2)
                    tier_2 = round(tier_2, 2)
                    
                    # =================================================================
                    # RATIO ADJUSTMENT (adjust tier_2 price if ratio out of bounds)
                    # =================================================================
                    tier_1_qty = row.get('tier_1_qty', None)
                    tier_2_qty = row.get('tier_2_qty', None)
                    
                    if tier_1_qty is not None and tier_2_qty is not None and tier_1_qty > 0:
                        tier_1_discount = current_price - tier_1
                        tier_2_discount = current_price - tier_2
                        
                        if tier_1_discount > 0:
                            diff_quantity = tier_2_qty / tier_1_qty
                            diff_discount = tier_2_discount / tier_1_discount
                            
                            if diff_quantity > 0:
                                discount_qty_ratio = diff_discount / diff_quantity
                                
                                # If ratio too high, reduce T2 discount
                                if discount_qty_ratio > max_ratio:
                                    target_tier_2_discount = max_ratio * diff_quantity * tier_1_discount
                                    adjusted_tier_2 = current_price - target_tier_2_discount
                                    
                                    # Check against T2 minimum (40% of current margin)
                                    if adjusted_tier_2 > wac and adjusted_tier_2 >= min_t2_price and adjusted_tier_2 < tier_1:
                                        tier_2 = round(adjusted_tier_2, 2)
                                        source = source + '_ratio_down'
                                    else:
                                        tier_1 = None
                                        tier_2 = None
                                        source = f'cannot_ratio_{discount_qty_ratio:.2f}_max_margin_constraint'
                                
                                # If ratio too low, increase T2 discount
                                elif discount_qty_ratio < min_ratio:
                                    target_tier_2_discount = min_ratio * diff_quantity * tier_1_discount
                                    adjusted_tier_2 = current_price - target_tier_2_discount
                                    
                                    # Check against T2 minimum (40% of current margin)
                                    if adjusted_tier_2 > wac and adjusted_tier_2 >= min_t2_price and adjusted_tier_2 < tier_1:
                                        tier_2 = round(adjusted_tier_2, 2)
                                        source = source + '_ratio_up'
                                    else:
                                        tier_1 = None
                                        tier_2 = None
                                        source = f'cannot_ratio_{discount_qty_ratio:.2f}_min_margin_constraint'
    
    # FINAL CHECK: If only one tier exists, invalidate both
    if (tier_1 is None and tier_2 is not None) or (tier_1 is not None and tier_2 is None):
        tier_1 = None
        tier_2 = None
        source = 'incomplete_tier_pair'
    
    # If both are None and no source set, mark it
    if tier_1 is None and tier_2 is None and source == '':
        source = 'no_valid_prices'
    
    return pd.Series({
        'tier_1_price': tier_1,
        'tier_2_price': tier_2,
        'price_source': source
    })


def calculate_both_from_margin_range(row, wac, current_price, max_discount_price, min_discount_price, absolute_min_price):
    """
    Calculate both tier prices using margin range from minimum of (min_boundary, optimal_bm) to current margin.
    Returns (tier_1_price, tier_2_price) or (None, None)
    """
    
    # Calculate current margin: margin = (price - wac) / price
    current_margin = (current_price - wac) / current_price
    
    # Get min_boundary margin
    min_boundary_margin = None
    if 'min_boundary' in row.index and pd.notna(row['min_boundary']) and 0 < row['min_boundary'] < 1:
        min_boundary_margin = row['min_boundary']
    
    # Get optimal_bm margin
    optimal_margin = None
    if 'optimal_bm' in row.index and pd.notna(row['optimal_bm']) and 0 < row['optimal_bm'] < 1:
        optimal_margin = row['optimal_bm']
    
    # Determine starting margin: minimum of (min_boundary, optimal_bm)
    start_margin = None
    
    if min_boundary_margin is not None and optimal_margin is not None:
        start_margin = min(min_boundary_margin, optimal_margin)
    elif min_boundary_margin is not None:
        start_margin = min_boundary_margin
    elif optimal_margin is not None:
        start_margin = optimal_margin
    else:
        # Fallback: use 50% of current margin
        start_margin = current_margin * 0.85
    
    # Ensure start_margin is less than current margin
    if start_margin >= current_margin:
        start_margin = current_margin * 0.85
    
    # Generate margin points in the range (10 points)
    num_points = 10
    margin_range = np.linspace(start_margin, current_margin, num_points)
    
    # Calculate prices from these margins: price = wac / (1 - margin)
    price_candidates = []
    for margin in margin_range:
        if margin < 1:
            price = wac / (1 - margin)
            # Only keep prices within discount bounds and above absolute_min_price
            if  max_discount_price <= price <= min_discount_price and price > wac*0.9:
                price_candidates.append(price)
    
    if len(price_candidates) < 2:
        return None, None
    
    # Sort prices descending
    price_candidates = sorted(price_candidates, reverse=True)
    
    # Select Tier 1: closer to the top (less discount)
    # Select Tier 2: further down (more discount)
    tier_1_idx = int(len(price_candidates) * 0.25)  # 25% from top
    tier_2_idx = int(len(price_candidates) * 0.65)  # 65% from top
    
    # Ensure valid indices
    tier_1_idx = max(0, min(tier_1_idx, len(price_candidates) - 2))
    tier_2_idx = max(tier_1_idx + 1, min(tier_2_idx, len(price_candidates) - 1))
    
    tier_1 = price_candidates[tier_1_idx]
    tier_2 = price_candidates[tier_2_idx]
    
    # Ensure meaningful gap (at least 0.5%)
    min_gap_pct = 0.25
    if tier_2 > tier_1 * (1 - min_gap_pct / 100):
        # Try to find better tier_2
        for i in range(tier_2_idx + 1, len(price_candidates)):
            if price_candidates[i] < tier_1 * (1 - min_gap_pct / 100):
                tier_2 = price_candidates[i]
                break
    
    # Final validation
    if tier_2 >= tier_1 or tier_1 <= wac*0.9 or tier_2 <= wac*0.9:
        return None, None
    
    return tier_1, tier_2


def calculate_from_margin_range(row, wac, current_price, other_tier_price, tier, 
                                max_discount_price, min_discount_price, absolute_min_price):
    """
    Calculate single tier price using margin range.
    Used when one tier is from market and we need to calculate the other.
    """
    
    # Calculate current margin
    current_margin = (current_price - wac) / current_price
    
    # Get min_boundary margin
    min_boundary_margin = None
    if 'min_boundary' in row.index and pd.notna(row['min_boundary']) and 0 < row['min_boundary'] < 1:
        min_boundary_margin = row['min_boundary']
    
    # Get optimal_bm margin
    optimal_margin = None
    if 'optimal_bm' in row.index and pd.notna(row['optimal_bm']) and 0 < row['optimal_bm'] < 1:
        optimal_margin = row['optimal_bm']
    
    # Determine starting margin: minimum of (min_boundary, optimal_bm)
    start_margin = None
    
    if min_boundary_margin is not None and optimal_margin is not None:
        start_margin = min(min_boundary_margin, optimal_margin)
    elif min_boundary_margin is not None:
        start_margin = min_boundary_margin
    elif optimal_margin is not None:
        start_margin = optimal_margin
    else:
        start_margin = current_margin * 0.5
    
    # Ensure start_margin is less than current margin
    if start_margin >= current_margin:
        start_margin = current_margin * 0.7
    
    # Generate margin range (10 points)
    num_points = 10
    margin_range = np.linspace(start_margin, current_margin, num_points)
    
    # Calculate prices
    price_candidates = []
    for margin in margin_range:
        if margin < 1:
            price = wac / (1 - margin)
            if max_discount_price <= price <= min_discount_price and price >= wac*0.9:
                price_candidates.append(price)
    
    if len(price_candidates) == 0:
        return None
    
    # Sort prices descending
    price_candidates = sorted(price_candidates, reverse=True)
    
    min_gap_pct = 0.5
    
    if tier == 1:
        # Need tier 1 (higher price), we have tier 2 (lower price)
        # Find prices above tier 2 with proper gap
        target_candidates = [p for p in price_candidates 
                           if p > other_tier_price * (1 + min_gap_pct / 100)]
        if target_candidates:
            # Take from upper portion (25% position)
            idx = int(len(target_candidates) * 0.25)
            return target_candidates[idx]
        return None
    
    else:
        # Need tier 2 (lower price), we have tier 1 (higher price)
        # Find prices below tier 1 with proper gap
        target_candidates = [p for p in price_candidates 
                           if p < other_tier_price * (1 - min_gap_pct / 100)]
        if target_candidates:
            # Take from lower portion (65% position)
            idx = int(len(target_candidates) * 0.65)
            idx = min(idx, len(target_candidates) - 1)
            return target_candidates[idx]
        return None


### 5.2 Apply Price Calculations


In [30]:
# =============================================================================
# APPLY PRICE CALCULATIONS
# =============================================================================

# Normalize column names
final_data.columns = final_data.columns.str.lower()

print(f"Processing {len(final_data)} SKUs...")
print(f"Parameters: MAX_DISCOUNT={MAX_DISCOUNT_PCT}%, MIN_DISCOUNT={MIN_DISCOUNT_PCT}%, RATIO=[{MIN_RATIO}, {MAX_RATIO}]")

# Apply price calculation to each row
result = final_data.apply(
    lambda row: calculate_tier_prices(
        row, 
        max_discount_pct=MAX_DISCOUNT_PCT,
        min_discount_pct=MIN_DISCOUNT_PCT,
        min_ratio=MIN_RATIO,
        max_ratio=MAX_RATIO
    ), 
    axis=1
)

# Merge results back to dataframe
final_data = pd.concat([final_data, result], axis=1)

# Summary of ratio adjustments
ratio_adjusted_down = final_data['price_source'].str.contains('ratio_down', na=False).sum()
ratio_adjusted_up = final_data['price_source'].str.contains('ratio_up', na=False).sum()
cannot_adjust = final_data['price_source'].str.contains('cannot', na=False).sum()

print(f"\n--- Ratio Adjustment Summary ---")
print(f"  Ratio adjusted down (was above {MAX_RATIO}): {ratio_adjusted_down} SKUs")
print(f"  Ratio adjusted up (was below {MIN_RATIO}):   {ratio_adjusted_up} SKUs")
print(f"  Could not adjust (constraints violated):  {cannot_adjust} SKUs")

# Filter to only products with valid tier prices
final_data = final_data[
    (~final_data['tier_1_price'].isna()) & 
    (~final_data['tier_2_price'].isna())
]

print(f"\n✓ Final SKUs with valid tier prices: {len(final_data)}")

Processing 3955 SKUs...
Parameters: MAX_DISCOUNT=5.0%, MIN_DISCOUNT=0.35%, RATIO=[1.05, 3]

--- Ratio Adjustment Summary ---
  Ratio adjusted down (was above 3): 119 SKUs
  Ratio adjusted up (was below 1.05):   1512 SKUs
  Could not adjust (constraints violated):  256 SKUs

✓ Final SKUs with valid tier prices: 3086


In [31]:
# =============================================================================
# VALIDATE AND APPLY DISCOUNT CONSTRAINTS FROM FEEDBACK
# =============================================================================
# Constraints:
# 1. T1 discount < T2 discount < T3 discount (WS discount)
# 2. Elasticity ratio >= 1.1 between consecutive tiers

if len(feedback_data) > 0:
    print("\n" + "="*80)
    print("APPLYING DISCOUNT CONSTRAINTS FROM FEEDBACK")
    print("="*80)
    
    # Merge feedback discount suggestions
    final_data = final_data.merge(
        feedback_data[['warehouse_id', 'product_id', 'packing_unit_id',
                       'suggested_t1_discount', 'suggested_t2_discount',
                       't1_action', 't2_action']],
        on=['warehouse_id', 'product_id', 'packing_unit_id'],
        how='left'
    )
    
    # ==========================================================================
    # SMART T1 DISCOUNT ADJUSTMENTS
    # Only apply if recommendation actually improves the current value
    # ==========================================================================
    mask_d1 = final_data['suggested_t1_discount'].notna()
    d1_applied = 0
    d1_skipped = 0
    final_data['discount_1'] = (final_data['packing_unit_price'] - final_data['tier_1_price']).round(2)
    final_data['discount_2'] = (final_data['packing_unit_price'] - final_data['tier_2_price']).round(2)
    final_data['discount_1_pct'] = ((final_data['discount_1'] / final_data['packing_unit_price']) * 100).round(2)
    final_data['discount_2_pct'] = ((final_data['discount_2'] / final_data['packing_unit_price']) * 100).round(2)
    if mask_d1.sum() > 0:
        original_d1 = final_data.loc[mask_d1, 'discount_1_pct'].copy()
        suggested_d1 = final_data.loc[mask_d1, 'suggested_t1_discount']
        d1_action = final_data.loc[mask_d1, 't1_action'].fillna('')
        
        # Skip if: action is INCREASE_DISCOUNT and script already >= suggested
        # Skip if: action is DECREASE_DISCOUNT/REDUCE and script already <= suggested
        skip_d1_increase = d1_action.str.contains('INCREASE_DISCOUNT', case=False, na=False) & (original_d1 >= suggested_d1)
        skip_d1_decrease = d1_action.str.contains('DECREASE_DISCOUNT|REDUCE_DISCOUNT', case=False, na=False) & (original_d1 <= suggested_d1)
        skip_d1 = skip_d1_increase | skip_d1_decrease
        
        # Apply only where NOT skipped
        apply_d1 = mask_d1.copy()
        apply_d1.loc[mask_d1] = ~skip_d1.values
        
        if apply_d1.sum() > 0:
            final_data.loc[apply_d1, 'discount_1_pct'] = final_data.loc[apply_d1, 'suggested_t1_discount']
            final_data.loc[apply_d1, 'tier_1_price'] = (
                final_data.loc[apply_d1, 'packing_unit_price'] * 
                (1 - final_data.loc[apply_d1, 'suggested_t1_discount'] / 100)
            ).round(2)
            d1_applied = apply_d1.sum()
            print(f"  ✓ Applied T1 discount adjustments: {d1_applied} SKUs")
        
        d1_skipped = skip_d1.sum()
        if d1_skipped > 0:
            print(f"  ⚠ Skipped T1 discount for {d1_skipped} SKUs (script already better)")
    
    # ==========================================================================
    # SMART T2 DISCOUNT ADJUSTMENTS
    # Only apply if recommendation actually improves the current value
    # ==========================================================================
    mask_d2 = final_data['suggested_t2_discount'].notna()
    d2_applied = 0
    d2_skipped = 0
    
    if mask_d2.sum() > 0:
        original_d2 = final_data.loc[mask_d2, 'discount_2_pct'].copy()
        suggested_d2 = final_data.loc[mask_d2, 'suggested_t2_discount']
        d2_action = final_data.loc[mask_d2, 't2_action'].fillna('')
        
        # Skip if: action is INCREASE_DISCOUNT and script already >= suggested
        # Skip if: action is DECREASE_DISCOUNT/REDUCE and script already <= suggested
        skip_d2_increase = d2_action.str.contains('INCREASE_DISCOUNT', case=False, na=False) & (original_d2 >= suggested_d2)
        skip_d2_decrease = d2_action.str.contains('DECREASE_DISCOUNT|REDUCE_DISCOUNT', case=False, na=False) & (original_d2 <= suggested_d2)
        skip_d2 = skip_d2_increase | skip_d2_decrease
        
        # Apply only where NOT skipped
        apply_d2 = mask_d2.copy()
        apply_d2.loc[mask_d2] = ~skip_d2.values
        
        if apply_d2.sum() > 0:
            final_data.loc[apply_d2, 'discount_2_pct'] = final_data.loc[apply_d2, 'suggested_t2_discount']
            final_data.loc[apply_d2, 'tier_2_price'] = (
                final_data.loc[apply_d2, 'packing_unit_price'] * 
                (1 - final_data.loc[apply_d2, 'suggested_t2_discount'] / 100)
            ).round(2)
            d2_applied = apply_d2.sum()
            print(f"  ✓ Applied T2 discount adjustments: {d2_applied} SKUs")
        
        d2_skipped = skip_d2.sum()
        if d2_skipped > 0:
            print(f"  ⚠ Skipped T2 discount for {d2_skipped} SKUs (script already better)")
    
    # Drop action columns (no longer needed)
    final_data = final_data.drop(columns=['t1_action', 't2_action'], errors='ignore')
    
    # ==========================================================================
    # VALIDATE DISCOUNT CONSTRAINTS
    # ==========================================================================
    print("\n  Validating discount constraints...")
    
    # Constraint 1: T2 discount must be > T1 discount
    invalid_disc = final_data['discount_2_pct'] <= final_data['discount_1_pct']
    if invalid_disc.sum() > 0:
        # Fix: Set T2 discount = T1 discount + 0.5
        final_data.loc[invalid_disc, 'discount_2_pct'] = final_data.loc[invalid_disc, 'discount_1_pct'] + 0.5
        final_data.loc[invalid_disc, 'tier_2_price'] = (
            final_data.loc[invalid_disc, 'packing_unit_price'] * 
            (1 - final_data.loc[invalid_disc, 'discount_2_pct'] / 100)
        ).round(2)
        print(f"    Fixed {invalid_disc.sum()} SKUs where T2 discount <= T1 discount")
    
    # Constraint 2: Elasticity ratio >= MIN_ELASTICITY_RATIO
    qty_ratio = final_data['tier_2_qty'] / final_data['tier_1_qty']
    disc_ratio = final_data['discount_2_pct'] / final_data['discount_1_pct'].replace(0, np.nan)
    elasticity = disc_ratio / qty_ratio
    
    low_elasticity = elasticity < MIN_ELASTICITY_RATIO
    if low_elasticity.sum() > 0:
        # Fix: Increase T2 discount to meet minimum elasticity
        required_disc_ratio = MIN_ELASTICITY_RATIO * qty_ratio
        new_d2 = final_data['discount_1_pct'] * required_disc_ratio
        final_data.loc[low_elasticity, 'discount_2_pct'] = new_d2.loc[low_elasticity].round(2)
        final_data.loc[low_elasticity, 'tier_2_price'] = (
            final_data.loc[low_elasticity, 'packing_unit_price'] * 
            (1 - final_data.loc[low_elasticity, 'discount_2_pct'] / 100)
        ).round(2)
        print(f"    Fixed {low_elasticity.sum()} SKUs with elasticity < {MIN_ELASTICITY_RATIO}")
    
    # Drop temporary columns
    final_data = final_data.drop(columns=['suggested_t1_discount', 'suggested_t2_discount'], errors='ignore')

# Calculate final elasticity statistics
qty_ratio_final = final_data['tier_2_qty'] / final_data['tier_1_qty']
disc_ratio_final = final_data['discount_2_pct'] / final_data['discount_1_pct'].replace(0, np.nan)
elasticity_final = disc_ratio_final / qty_ratio_final

print(f"\n  Final elasticity statistics:")
print(f"    Min: {elasticity_final.min():.2f}")
print(f"    Mean: {elasticity_final.mean():.2f}")
print(f"    Max: {elasticity_final.max():.2f}")
print(f"    SKUs below {MIN_ELASTICITY_RATIO}: {(elasticity_final < MIN_ELASTICITY_RATIO).sum()}")

print("\n" + "="*80)


APPLYING DISCOUNT CONSTRAINTS FROM FEEDBACK
  ✓ Applied T1 discount adjustments: 1128 SKUs
  ⚠ Skipped T1 discount for 50 SKUs (script already better)
  ✓ Applied T2 discount adjustments: 1100 SKUs
  ⚠ Skipped T2 discount for 78 SKUs (script already better)

  Validating discount constraints...
    Fixed 11 SKUs where T2 discount <= T1 discount
    Fixed 1444 SKUs with elasticity < 1.1

  Final elasticity statistics:
    Min: 1.09
    Mean: 1.56
    Max: 12.93
    SKUs below 1.1: 754



## 6. Wholesale Pricing

Calculate wholesale prices based on:
- Vehicle capacity (quarter truck)
- Rank-based margin tiers (20%, 25%, 40%, 60% of target margin)
- Must be below tier_2_price


In [32]:
# =============================================================================
# PREPARE DELIVERY FEE DATA
# =============================================================================

# Set delivery fees based on region
final_data['delivery_fees'] = DELIVERY_FEE_OTHER
final_data.loc[final_data['region'].isin(['Cairo', 'Giza']), 'delivery_fees'] = DELIVERY_FEE_CAIRO_GIZA

# Prepare query data for wholesale calculation
query_data = final_data[['warehouse_id', 'product_id', 'packing_unit_id', 'delivery_fees']].values.tolist()
query_info = ','.join([
    f"({int(wh_id)}, {int(prod_id)}, {int(pu_id)}, {int(delivery_fees)})" 
    for wh_id, prod_id, pu_id, delivery_fees in query_data
])

print(f"✓ Prepared {len(query_data)} products for wholesale calculation")

✓ Prepared 3086 products for wholesale calculation


In [33]:
query = f'''
with chosen_products as (
select *
from (
values 
{query_info}
)x(warehouse_id,product_id,packing_unit_id,delivery_fees)

),
vec as (
select  vt.id as vehicle_id,name_en as vehicle_name,vc.weight as vehicle_weight,vc.cbm as vehicle_cbm,900 as vehicle_cost
from VEHICLE_TYPES  vt 
join  RETOOL.VEHICLE_CAPACITIES vc on vc.vehicle_id = vt.id
where vehicle_id = 1
),
selected_products as (
select x.*,	(long*width*height)/1000000 AS cbm,weight/1000 AS weight,
from chosen_products x
join packing_unit_products on x.product_id = packing_unit_products.product_id and packing_unit_products.packing_unit_id = x.packing_unit_id
),
main_cte as (
select warehouse_id,product_id,packing_unit_id,delivery_fees,
ceil(least(quart_dababa_wht,quart_dababa_cbm)) as quart_dababa,
vehicle_cost
from (
select * ,
((vehicle_weight*0.9)/4)/weight as quart_dababa_wht , 
((vehicle_cbm*0.9)/4)/cbm as quart_dababa_cbm  
from (
select selected_products.*, vehicle_weight,vehicle_cbm,vehicle_cost
from selected_products,vec
)
)
)
select mc.*, f.wac_p , 
(f.wac_p*quart_dababa)+(((vehicle_cost-(delivery_fees*4))*0.9)/4) as quart_cost,
quart_cost/quart_dababa as unit_cost


from main_cte mc 
join finance.all_cogs f on f.product_id = mc.product_id and CURRENT_TIMEstamp between from_date and to_date 

'''

print("Fetching wholesale cost data (quarter truck calculations)...")
ws_data = snowflake_query("Egypt", query)
ws_data.columns = ws_data.columns.str.lower()

for col in ws_data.columns:
    ws_data[col] = pd.to_numeric(ws_data[col], errors='ignore')

# Select and rename columns
ws_data = ws_data[['warehouse_id', 'product_id', 'packing_unit_id', 'quart_dababa', 'unit_cost']]
ws_data.columns = ['warehouse_id', 'product_id', 'packing_unit_id', 'WS_tier', 'WS_wac']

print(f"✓ Calculated wholesale data for {len(ws_data)} products")

Fetching wholesale cost data (quarter truck calculations)...
✓ Calculated wholesale data for 3086 products


In [34]:
# =============================================================================
# NOTE: Forced brands/categories loading removed
# This was only used by the old rank-based wholesale logic which has been removed.
# Now using only the delivery savings-based wholesale logic.
# =============================================================================

print("✓ Skipping forced brands/categories - not needed for delivery savings logic")

✓ Skipping forced brands/categories - not needed for delivery savings logic


In [35]:
query = '''
SELECT product_id, new_pp, forecasted_date
FROM materialized_views.DBDP_PRICE_UPS
WHERE region = 'Cairo'
'''

print("Fetching price-up forecasts...")
price_ups = snowflake_query("Egypt", query)
price_ups.columns = price_ups.columns.str.lower()

for col in price_ups.columns:
    price_ups[col] = pd.to_numeric(price_ups[col], errors='ignore')

print(f"✓ Retrieved {len(price_ups)} price-up forecasts")

Fetching price-up forecasts...
✓ Retrieved 205 price-up forecasts


In [36]:
# Merge wholesale data and price-ups with final data
final_data = final_data.merge(ws_data, on=['warehouse_id', 'product_id', 'packing_unit_id'], how='left')
final_data['WS_wac'] = final_data['WS_wac'] * final_data['basic_unit_count']
final_data = final_data.merge(price_ups, on='product_id', how='left')

print(f"✓ Added wholesale and price-up data to {len(final_data)} products")

✓ Added wholesale and price-up data to 3086 products


In [37]:
# =============================================================================
# NOTE: Old wholesale logic (rank-based margin tiers) has been removed.
# Only using the delivery savings-based wholesale logic (see next cells).
# =============================================================================
print("✓ Using delivery savings-based wholesale logic only")

✓ Using delivery savings-based wholesale logic only


In [38]:
# =============================================================================
# OLD WHOLESALE LOGIC REMOVED
# Wholesale pricing now uses only the delivery savings-based logic below
# =============================================================================
print("✓ Skipping old wholesale logic - using delivery savings logic only")

✓ Skipping old wholesale logic - using delivery savings logic only


### 6.2 Wholesale NEW Logic (Delivery Savings Based)

New wholesale pricing based on delivery cost savings:
- **Car cost**: 1400 EGP per delivery
- **Car capacity**: 1.8 tons max
- **Max ticket size**: 30,000 EGP
- **Logic**: If retailer orders multiples of average ticket size, they save deliveries
  - 2x avg TS = 1 delivery saved → discount = delivery cost savings
  - 3x avg TS = 2 deliveries saved → more discount
- **Wholesale margin constraint**: margin >= 25% of current margin
- **Goal**: Find optimal quantity that gives retailer max savings while maintaining minimum margin


In [39]:
# =============================================================================
# WHOLESALE NEW LOGIC - Configuration
# =============================================================================
WS_CAR_COST = 1400           # Cost per delivery (EGP)
WS_CAR_CAPACITY_TONS = 1.8  # Max car capacity in tons
WS_MAX_TICKET_SIZE = 35000  # Maximum ticket size (EGP)
WS_MIN_MARGIN = 0.015        # Minimum margin (1%) above WAC

# Query to get average ticket size per warehouse
query = f'''
WITH base AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
    FROM (
        SELECT x.*, TAGGABLE_ID as retailer_id 
        FROM (
            SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
            FROM cohorts 
            WHERE is_active = 'true'
                AND id IN (700,701,702,703,704,1123,1124,1125,1126)
        ) x 
        JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
        WHERE dt.taggable_id not IN (
            SELECT taggable_id FROM DYNAMIC_TAGgables 
            WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
        )
    )
    QUALIFY rnk = 1 
),

-- Map regions to warehouses
whs AS (
    SELECT * FROM (VALUES
        ('Cairo', 'El-Marg', 38),
        ('Cairo', 'Mostorod', 1),
        ('Giza', 'Barageel', 236),
        ('Giza', 'Sakkarah', 962),
        ('Delta West', 'El-Mahala', 337),
        ('Delta West', 'Tanta', 8),
        ('Delta East', 'Mansoura FC', 339),
        ('Delta East', 'Sharqya', 170),
        ('Upper Egypt', 'Assiut FC', 501),
        ('Upper Egypt', 'Bani sweif', 401),
        ('Upper Egypt', 'Menya Samalot', 703),
        ('Upper Egypt', 'Sohag', 632),
        ('Alexandria', 'Khorshed Alex', 797)
    ) x(region_name, wh, warehouse_id)
),

-- Get ticket sizes (order values) for last 4 months
ticket_sizes AS (
    SELECT 
        whs.warehouse_id,
        whs.wh as warehouse_name,
        so.parent_sales_order_id,
        so.retailer_id,
        SUM(pso.total_price) as ticket_size,
        SUM(pso.purchased_item_count * pup.weight / 1000) as order_weight_kg
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN base ON base.retailer_id = so.retailer_id
    JOIN packing_unit_products pup ON pup.product_id = pso.product_id 
        AND pup.packing_unit_id = pso.packing_unit_id
    JOIN materialized_views.retailer_polygon rp ON rp.retailer_id = so.retailer_id
    JOIN districts ON districts.id = rp.district_id
    JOIN cities ON cities.id = districts.city_id
    JOIN states ON states.id = cities.state_id
    JOIN regions ON regions.id = states.region_id
    JOIN whs ON whs.region_name = CASE WHEN regions.id = 2 THEN states.name_en ELSE regions.name_en END
    WHERE so.created_at::date BETWEEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '4 months') AND CURRENT_DATE - 1
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count > 0
    GROUP BY whs.warehouse_id, whs.wh, so.parent_sales_order_id, so.retailer_id
),

-- Calculate warehouse-level statistics
warehouse_stats AS (
    SELECT 
        warehouse_id,
        warehouse_name,
        COUNT(DISTINCT parent_sales_order_id) as total_orders,
        COUNT(DISTINCT retailer_id) as total_retailers,
        AVG(ticket_size) as avg_ticket_size,
        MEDIAN(ticket_size) as median_ticket_size,
        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY ticket_size) as p75_ticket_size,
        PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY ticket_size) as p90_ticket_size,
        MAX(ticket_size) as max_ticket_size,
        AVG(order_weight_kg) as avg_order_weight_kg,
        MEDIAN(order_weight_kg) as median_order_weight_kg
    FROM ticket_sizes
    WHERE ticket_size > 0
    GROUP BY warehouse_id, warehouse_name
)

SELECT 
    warehouse_id,
    warehouse_name,
    total_orders,
    total_retailers,
    ROUND(avg_ticket_size, 2) as avg_ticket_size,
    ROUND(median_ticket_size, 2) as median_ticket_size,
    ROUND(p75_ticket_size, 2) as p75_ticket_size,
    ROUND(p90_ticket_size, 2) as p90_ticket_size,
    ROUND(max_ticket_size, 2) as max_ticket_size,
    ROUND(avg_order_weight_kg, 2) as avg_order_weight_kg,
    ROUND(median_order_weight_kg, 2) as median_order_weight_kg,
    -- Calculate how many orders fit in one car based on weight
    ROUND({WS_CAR_CAPACITY_TONS * 1000} / NULLIF(avg_order_weight_kg, 0), 1) as orders_per_car_by_weight
FROM warehouse_stats
ORDER BY warehouse_id
'''

ws_ticket_data = snowflake_query("Egypt", query)
ws_ticket_data.columns = ws_ticket_data.columns.str.lower()
for col in ws_ticket_data.columns:
    ws_ticket_data[col] = pd.to_numeric(ws_ticket_data[col], errors='ignore')

print("=== WAREHOUSE TICKET SIZE STATISTICS ===")
print(ws_ticket_data[['warehouse_name', 'avg_ticket_size', 'median_ticket_size', 'avg_order_weight_kg']].to_string(index=False))
print(f"\nOverall average ticket size: {ws_ticket_data['avg_ticket_size'].mean():.2f} EGP")


=== WAREHOUSE TICKET SIZE STATISTICS ===
warehouse_name  avg_ticket_size  median_ticket_size  avg_order_weight_kg
      Mostorod          5265.42             3640.50               120.99
         Tanta          3573.23             2403.00                87.53
       El-Marg          5265.42             3640.50               120.99
       Sharqya          3720.38             2580.25                85.96
      Barageel          5397.95             3666.00               113.57
     El-Mahala          3573.23             2403.00                87.53
   Mansoura FC          3720.38             2580.25                85.96
    Bani sweif          4304.67             2566.00               114.68
     Assiut FC          4304.67             2566.00               114.68
         Sohag          4304.67             2566.00               114.68
 Menya Samalot          4304.67             2566.00               114.68
 Khorshed Alex          3798.80             2287.75                98.01
      Sakk

In [40]:
# Merge ticket size data with final_data (including orders_per_car_by_weight)
final_data = final_data.merge(
    ws_ticket_data[['warehouse_id', 'avg_ticket_size', 'median_ticket_size', 'avg_order_weight_kg', 'orders_per_car_by_weight']], 
    on='warehouse_id', 
    how='left'
)

def calculate_ws_new_logic(row):
    """
    Calculate wholesale pricing based on delivery savings.
    
    Logic:
    - Car cost = 900 EGP, but car serves multiple orders per trip
    - Car cost per order = 900 / orders_per_car
    - If retailer consolidates, they save N orders worth of car cost
    - Savings = deliveries_saved * (car_cost / orders_per_car)
    - Calculate scenarios from 2x to max_multiplier (capped by max TS)
    
    Returns: Dict with optimal scenario
    """
    
    # Get values
    current_price = row['packing_unit_price']
    wac = row['wac_p']
    avg_ts = row.get('avg_ticket_size', 4000)  # Default 4000 if missing
    tier_2_price = row['tier_2_price']
    brand = row['brand']
    
    # Get orders per car (how many orders fit in one car trip based on weight)
    orders_per_car = row.get('orders_per_car_by_weight', 15)  # Default 10 if missing
    if pd.isna(orders_per_car) or orders_per_car <= 0:
        orders_per_car = 15
    
    # Calculate car cost per order
    car_cost_per_order = WS_CAR_COST / orders_per_car
    
    if pd.isna(avg_ts) or avg_ts <= 0:
        avg_ts = 4000
    
    if pd.isna(current_price) or pd.isna(wac) or current_price <= 0 or wac <= 0 or pd.isna(tier_2_price):
        return pd.Series({
            'ws_new_multiplier': None,
            'ws_new_order_value': None,
            'ws_new_qty': None,
            'ws_new_deliveries_saved': None,
            'ws_new_car_cost_per_order': None,
            'ws_new_total_savings': None,
            'ws_new_discount_per_unit': None,
            'ws_new_price': None,
            'ws_new_margin': None,
            'ws_new_savings_pct': None
        })
    
    # ==========================================================================
    # MARGIN-BASED MINIMUM PRICE FOR WHOLESALE (NEW CONSTRAINT)
    # Wholesale: minimum margin = 25% of current margin
    # ==========================================================================
    current_margin = (current_price - wac) / current_price
    min_ws_margin = 0.4 * current_margin
    # Calculate minimum WS price: price = wac / (1 - margin)
    min_ws_price_margin_based = wac / (1 - min_ws_margin) if min_ws_margin < 1 else current_price
    
    # Also keep legacy minimum (WAC + WS_MIN_MARGIN)
    min_ws_price_legacy = wac / (1 - WS_MIN_MARGIN)
    
    # Use the HIGHER of the two constraints
    min_acceptable_price = max(min_ws_price_margin_based, min_ws_price_legacy)
    
    # Calculate max multiplier based on constraints
    # Max by ticket size: WS_MAX_TICKET_SIZE / avg_ts
    # No arbitrary cap - let WS_MAX_TICKET_SIZE (50K) be the only limit
    max_multiplier = int(WS_MAX_TICKET_SIZE / avg_ts)
    
    best_scenario = None
    best_savings_pct = 0
    
    # Test scenarios from 2x to max_multiplier
    for multiplier in range(3, int(orders_per_car) + 1):#max_multiplier
        # Order value at this multiplier
        order_value = avg_ts * multiplier
        
        # Deliveries saved = multiplier - 1 (consolidating multiple orders into one)
        deliveries_saved = multiplier - 1
        
        # Total savings = deliveries_saved * car_cost_per_order
        # This is the actual cost saving from consolidating orders
        total_savings = (deliveries_saved * car_cost_per_order)
        
        # How many units of this SKU fit in this order value?
        qty_at_current_price = order_value / current_price
        
        if qty_at_current_price <= 0:
            continue
        
        # Discount per unit from car cost savings
        discount_per_unit = total_savings / qty_at_current_price
        
        # New price after passing car cost savings
        new_price = current_price - discount_per_unit
        
        # Check if price stays above minimum (25% of current margin OR legacy min)
        if new_price >= min_acceptable_price and order_value <=WS_MAX_TICKET_SIZE and new_price < tier_2_price:
            # Calculate margin at new price
            margin = (new_price - wac) / new_price
            
            # Savings percentage for retailer
            savings_pct = (discount_per_unit / current_price) * 100
            
            # Keep track of best scenario (highest savings while valid)
            if savings_pct > best_savings_pct:
                best_savings_pct = savings_pct
                best_scenario = {
                    'ws_new_multiplier': multiplier,
                    'ws_new_order_value': round(order_value, 2),
                    'ws_new_qty': round(qty_at_current_price, 0),
                    'ws_new_deliveries_saved': deliveries_saved,
                    'ws_new_car_cost_per_order': round(car_cost_per_order, 2),
                    'ws_new_total_savings': round(total_savings, 2),
                    'ws_new_discount_per_unit': round(discount_per_unit, 2),
                    'ws_new_price': round(new_price, 2),
                    'ws_new_margin': round(margin, 4),
                    'ws_new_savings_pct': round(savings_pct, 2)
                }
    
    if best_scenario:
        return pd.Series(best_scenario)
    else:
        return pd.Series({
            'ws_new_multiplier': None,
            'ws_new_order_value': None,
            'ws_new_qty': None,
            'ws_new_deliveries_saved': None,
            'ws_new_car_cost_per_order': None,
            'ws_new_total_savings': None,
            'ws_new_discount_per_unit': None,
            'ws_new_price': None,
            'ws_new_margin': None,
            'ws_new_savings_pct': None
        })

# Apply the new wholesale logic
print("Calculating new wholesale logic based on delivery savings...")
ws_new_results = final_data.apply(calculate_ws_new_logic, axis=1)
final_data = pd.concat([final_data, ws_new_results], axis=1)

# Summary statistics
valid_ws_new = final_data['ws_new_price'].notna().sum()
print(f"\n=== NEW WHOLESALE LOGIC SUMMARY ===")
print(f"SKUs with valid WS new price: {valid_ws_new} / {len(final_data)}")
print(f"Total car cost: {WS_CAR_COST} EGP")
print(f"Average orders per car: {final_data['orders_per_car_by_weight'].mean():.1f}")
print(f"Average car cost per order: {WS_CAR_COST / final_data['orders_per_car_by_weight'].mean():.2f} EGP")

if valid_ws_new > 0:
    print(f"\nOrder Consolidation:")
    print(f"  Average multiplier: {final_data['ws_new_multiplier'].mean():.1f}x of avg ticket size")
    print(f"  Average order value needed: {final_data['ws_new_order_value'].mean():.2f} EGP")
    print(f"  Average deliveries saved: {final_data['ws_new_deliveries_saved'].mean():.1f}")
    
    print(f"\nCar Cost Savings:")
    print(f"  Average car cost per order: {final_data['ws_new_car_cost_per_order'].mean():.2f} EGP")
    print(f"  Average total savings: {final_data['ws_new_total_savings'].mean():.2f} EGP")
    print(f"  Average discount per unit: {final_data['ws_new_discount_per_unit'].mean():.2f} EGP")
    
    print(f"\nPricing:")
    print(f"  Average WS new price margin: {final_data['ws_new_margin'].mean()*100:.2f}%")
    print(f"  Average retailer savings: {final_data['ws_new_savings_pct'].mean():.2f}%")
    
    # Distribution of multipliers
    print(f"\nMultiplier distribution:")
    print(final_data['ws_new_multiplier'].value_counts().sort_index())


Calculating new wholesale logic based on delivery savings...

=== NEW WHOLESALE LOGIC SUMMARY ===
SKUs with valid WS new price: 1128 / 3086
Total car cost: 1400 EGP
Average orders per car: 17.6
Average car cost per order: 79.61 EGP

Order Consolidation:
  Average multiplier: 7.8x of avg ticket size
  Average order value needed: 32324.96 EGP
  Average deliveries saved: 6.8

Car Cost Savings:
  Average car cost per order: 80.73 EGP
  Average total savings: 538.74 EGP
  Average discount per unit: 4.74 EGP

Pricing:
  Average WS new price margin: 3.79%
  Average retailer savings: 1.65%

Multiplier distribution:
ws_new_multiplier
3.0     27
4.0     16
5.0     18
6.0    232
7.0      4
8.0    364
9.0    467
Name: count, dtype: int64


In [41]:
# =============================================================================
# APPLY T3 (WHOLESALE) FEEDBACK ADJUSTMENTS
# =============================================================================

if len(feedback_t3_data) > 0:
    print("\n" + "="*80)
    print("APPLYING T3 (WHOLESALE) FEEDBACK ADJUSTMENTS")
    print("="*80)
    
    # Merge T3 feedback with final_data
    final_data = final_data.merge(
        feedback_t3_data,
        on=['warehouse_id', 'product_id', 'packing_unit_id'],
        how='left'
    )
    
    # Calculate current WS discount percentage for comparison
    final_data['current_ws_discount'] = (
        (final_data['packing_unit_price'] - final_data['ws_new_price']) / 
        final_data['packing_unit_price'] * 100
    ).round(2)
    
    # ==========================================================================
    # SMART T3 QUANTITY ADJUSTMENTS
    # Only apply if recommendation actually improves the current value
    # ==========================================================================
    mask_t3 = (final_data['suggested_t3_qty'].notna() & final_data['ws_new_qty'].notna())
    t3_qty_applied = 0
    t3_qty_skipped_already_better = 0
    t3_qty_capped = 0
    
    if mask_t3.sum() > 0:
        original_t3_qty = final_data.loc[mask_t3, 'ws_new_qty'].copy()
        suggested_t3_qty = final_data.loc[mask_t3, 'suggested_t3_qty']
        t3_action = final_data.loc[mask_t3, 't3_action'].fillna('')
        
        # Check if script already achieves the recommendation goal
        # Skip if: action is INCREASE and script already >= suggested
        # Skip if: action is DECREASE and script already <= suggested
        skip_t3_increase = t3_action.str.contains('INCREASE_QTY', case=False, na=False) & (original_t3_qty >= suggested_t3_qty)
        skip_t3_decrease = t3_action.str.contains('DECREASE_QTY', case=False, na=False) & (original_t3_qty <= suggested_t3_qty)
        skip_t3_already_better = skip_t3_increase | skip_t3_decrease
        
        t3_qty_skipped_already_better = skip_t3_already_better.sum()
        if t3_qty_skipped_already_better > 0:
            print(f"  ⚠ Skipped T3 qty for {t3_qty_skipped_already_better} SKUs (script already better)")
        
        # For remaining SKUs, apply with capping logic
        apply_t3_mask = mask_t3.copy()
        apply_t3_mask.loc[mask_t3] = ~skip_t3_already_better.values
        
        if apply_t3_mask.sum() > 0:
            # Calculate percentage change for those we'll process
            pct_change = (final_data.loc[apply_t3_mask, 'suggested_t3_qty'] - 
                          final_data.loc[apply_t3_mask, 'ws_new_qty']) / final_data.loc[apply_t3_mask, 'ws_new_qty']
            
            # Handle extreme reductions (suggested < 30% of original, i.e., >70% reduction)
            mask_extreme = pct_change < -0.70
            mask_extreme_full = apply_t3_mask.copy()
            mask_extreme_full.loc[apply_t3_mask] = mask_extreme.values
            
            if mask_extreme_full.sum() > 0:
                final_data.loc[mask_extreme_full, 'ws_new_qty'] = (
                    final_data.loc[mask_extreme_full, 'ws_new_qty'] * 0.9
                ).astype(int)
                print(f"  ⚠ Capped {mask_extreme_full.sum()} SKUs with extreme reduction (applied -15% instead)")
                t3_qty_capped += mask_extreme_full.sum()
            
            # Handle large decreases (-30% to -70%)
            mask_large_dec = (pct_change < -0.15) & (pct_change >= -0.70)
            mask_large_dec_full = apply_t3_mask.copy()
            mask_large_dec_full.loc[apply_t3_mask] = mask_large_dec.values
            
            if mask_large_dec_full.sum() > 0:
                final_data.loc[mask_large_dec_full, 'ws_new_qty'] = (
                    final_data.loc[mask_large_dec_full, 'ws_new_qty'] * 0.95
                ).astype(int)
                print(f"  ⚠ Capped {mask_large_dec_full.sum()} SKUs with large decrease (applied -15% instead)")
                t3_qty_capped += mask_large_dec_full.sum()
            
            # Handle large increases (> +30%)
            mask_large_inc = pct_change > 0.15
            mask_large_inc_full = apply_t3_mask.copy()
            mask_large_inc_full.loc[apply_t3_mask] = mask_large_inc.values
            
            if mask_large_inc_full.sum() > 0:
                final_data.loc[mask_large_inc_full, 'ws_new_qty'] = (
                    final_data.loc[mask_large_inc_full, 'ws_new_qty'] * 1.15
                ).astype(int)
                print(f"  ⚠ Capped {mask_large_inc_full.sum()} SKUs with large increase (applied +15% instead)")
                t3_qty_capped += mask_large_inc_full.sum()
            
            # Handle normal changes (within ±30%)
            mask_normal = (pct_change >= -0.15) & (pct_change <= 0.15)
            mask_normal_full = apply_t3_mask.copy()
            mask_normal_full.loc[apply_t3_mask] = mask_normal.values
            
            if mask_normal_full.sum() > 0:
                orig = final_data.loc[mask_normal_full, 'ws_new_qty'].copy()
                final_data.loc[mask_normal_full, 'ws_new_qty'] = final_data.loc[mask_normal_full, 'suggested_t3_qty'].astype(int)
                avg_change = ((final_data.loc[mask_normal_full, 'ws_new_qty'] - orig) / orig * 100).mean()
                t3_qty_applied = mask_normal_full.sum()
                print(f"  ✓ Applied T3 qty adjustments: {t3_qty_applied} SKUs (avg change: {avg_change:+.1f}%)")
    
    # ==========================================================================
    # SMART T3 DISCOUNT ADJUSTMENTS
    # Only apply if recommendation actually improves the current value
    # ==========================================================================
    mask_t3_disc = (final_data['suggested_t3_discount'].notna() & 
                    final_data['ws_new_price'].notna() &
                    final_data['ws_new_qty'].notna())
    t3_disc_applied = 0
    t3_disc_skipped = 0
    
    if mask_t3_disc.sum() > 0:
        original_ws_disc = final_data.loc[mask_t3_disc, 'current_ws_discount'].copy()
        suggested_ws_disc = final_data.loc[mask_t3_disc, 'suggested_t3_discount']
        t3_action = final_data.loc[mask_t3_disc, 't3_action'].fillna('')
        
        # Skip if: action contains INCREASE_DISCOUNT and script already >= suggested
        # Skip if: action contains DECREASE/REDUCE DISCOUNT and script already <= suggested
        skip_disc_increase = t3_action.str.contains('INCREASE_DISCOUNT', case=False, na=False) & (original_ws_disc >= suggested_ws_disc)
        skip_disc_decrease = t3_action.str.contains('DECREASE_DISCOUNT|REDUCE_DISCOUNT', case=False, na=False) & (original_ws_disc <= suggested_ws_disc)
        skip_disc = skip_disc_increase | skip_disc_decrease
        
        # Apply only where NOT skipped
        apply_disc = mask_t3_disc.copy()
        apply_disc.loc[mask_t3_disc] = ~skip_disc.values
        
        if apply_disc.sum() > 0:
            final_data.loc[apply_disc, 'ws_new_price'] = (
                final_data.loc[apply_disc, 'packing_unit_price'] * 
                (1 - (final_data.loc[apply_disc, 'suggested_t3_discount'] / 100))
            ).round(2)
            t3_disc_applied = apply_disc.sum()
            print(f"  ✓ Applied T3 discount adjustments: {t3_disc_applied} SKUs")
        
        t3_disc_skipped = skip_disc.sum()
        if t3_disc_skipped > 0:
            print(f"  ⚠ Skipped T3 discount for {t3_disc_skipped} SKUs (script already better)")
    
    # Drop temporary column
    final_data = final_data.drop(columns=['current_ws_discount'], errors='ignore')
    
    # ==========================================================================
    # VALIDATE T3 CONSTRAINTS: T2 qty < T3 qty AND T2 discount < T3 discount
    # ==========================================================================
    print("\n  Validating T3 constraints...")
    
    # Constraint 1: T3 qty must be > T2 qty
    invalid_t3_qty = (final_data['ws_new_qty'].notna() & 
                      (final_data['ws_new_qty'] <= final_data['tier_2_qty']))
    if invalid_t3_qty.sum() > 0:
        # Fix: Set T3 qty = T2 qty * 1.5
        final_data.loc[invalid_t3_qty, 'ws_new_qty'] = (
            final_data.loc[invalid_t3_qty, 'tier_2_qty'] * 10
        ).astype(int)
        print(f"    Fixed {invalid_t3_qty.sum()} SKUs where T3 qty <= T2 qty")
    
    # Constraint 2: T3 discount must be > T2 discount (T3 price < T2 price)
    # Calculate T3 discount percentage
    final_data['ws_discount_pct'] = (
        (final_data['packing_unit_price'] - final_data['ws_new_price']) / 
        final_data['packing_unit_price'] * 100
    ).round(2)
    
    invalid_t3_disc = (final_data['ws_new_price'].notna() & 
                       (final_data['ws_discount_pct'] <= final_data['discount_2_pct']))
    if invalid_t3_disc.sum() > 0:
        # Fix: Set T3 discount = T2 discount + 0.5
        new_t3_disc = final_data.loc[invalid_t3_disc, 'discount_2_pct'] + 0.5
        final_data.loc[invalid_t3_disc, 'ws_discount_pct'] = new_t3_disc
        final_data.loc[invalid_t3_disc, 'ws_new_price'] = (
            final_data.loc[invalid_t3_disc, 'packing_unit_price'] * 
            (1 - (new_t3_disc / 100))
        ).round(2)
        print(f"    Fixed {invalid_t3_disc.sum()} SKUs where T3 discount <= T2 discount")
    
    # Summary by T3 action
    if 't3_action' in final_data.columns:
        t3_actions = final_data[final_data['t3_action'].notna()]['t3_action'].value_counts()
        print(f"\n  T3 Actions applied:")
        for action, count in t3_actions.items():
            if action != 'NO_CHANGE':
                print(f"    - {action}: {count}")
    
    # Drop T3 feedback columns
    cols_to_drop = ['suggested_t3_qty', 'suggested_t3_discount', 't3_action']
    final_data = final_data.drop(columns=[c for c in cols_to_drop if c in final_data.columns], errors='ignore')
    
    print("\n" + "="*80)
else:
    print("⚠ No T3 (wholesale) feedback adjustments applied")
    # Still create ws_discount_pct column for later use
    if 'ws_new_price' in final_data.columns:
        final_data['ws_discount_pct'] = (
            (final_data['packing_unit_price'] - final_data['ws_new_price']) / 
            final_data['packing_unit_price'] * 100
        ).round(2)


APPLYING T3 (WHOLESALE) FEEDBACK ADJUSTMENTS
  ⚠ Skipped T3 qty for 7 SKUs (script already better)
  ⚠ Capped 36 SKUs with large decrease (applied -15% instead)
  ✓ Applied T3 qty adjustments: 266 SKUs (avg change: -9.9%)
  ✓ Applied T3 discount adjustments: 305 SKUs
  ⚠ Skipped T3 discount for 4 SKUs (script already better)

  Validating T3 constraints...
    Fixed 3 SKUs where T3 discount <= T2 discount

  T3 Actions applied:
    - DECREASE_QTY_OR_INCREASE_DISCOUNT: 365
    - INCREASE_QTY_OR_REDUCE_DISCOUNT: 2



In [42]:
# =============================================================================
# FINAL MARGIN CONSTRAINT ENFORCEMENT
# =============================================================================
# This ensures margin constraints are ALWAYS enforced, even after feedback adjustments
#
# Constraints:
# - Tier 1: margin >= 60% of current margin
# - Tier 2: margin >= 40% of current margin  
# - Wholesale: margin >= 25% of current margin
# =============================================================================

print("\n" + "="*80)
print("ENFORCING MARGIN CONSTRAINTS (FINAL VALIDATION)")
print("="*80)

# Calculate current margin for all rows
final_data['current_margin'] = (final_data['packing_unit_price'] - final_data['wac_p']) / final_data['packing_unit_price']

# ==========================================================================
# TIER 1 MARGIN CONSTRAINT: >= 60% of current margin
# ==========================================================================
final_data['tier_1_margin_actual'] = (final_data['tier_1_price'] - final_data['wac_p']) / final_data['tier_1_price']
final_data['min_t1_margin_required'] = 0.60 * final_data['current_margin']
final_data['min_t1_price_required'] = final_data['wac_p'] / (1 - final_data['min_t1_margin_required'])

# Find violations
t1_violations = (final_data['tier_1_price'].notna() & 
                 (final_data['tier_1_margin_actual'] < final_data['min_t1_margin_required']))

if t1_violations.sum() > 0:
    # Fix: Set tier_1_price to minimum required
    final_data.loc[t1_violations, 'tier_1_price'] = final_data.loc[t1_violations, 'min_t1_price_required'].round(2)
    # Update discount percentage
    final_data.loc[t1_violations, 'discount_1'] = (
        final_data.loc[t1_violations, 'packing_unit_price'] - final_data.loc[t1_violations, 'tier_1_price']
    ).round(2)
    final_data.loc[t1_violations, 'discount_1_pct'] = (
        final_data.loc[t1_violations, 'discount_1'] / final_data.loc[t1_violations, 'packing_unit_price'] * 100
    ).round(2)
    print(f"  ✓ Fixed {t1_violations.sum()} SKUs with T1 margin < 60% of current margin")
else:
    print(f"  ✓ All T1 prices meet 60% margin constraint")

# ==========================================================================
# TIER 2 MARGIN CONSTRAINT: >= 40% of current margin
# ==========================================================================
final_data['tier_2_margin_actual'] = (final_data['tier_2_price'] - final_data['wac_p']) / final_data['tier_2_price']
final_data['min_t2_margin_required'] = 0.40 * final_data['current_margin']
final_data['min_t2_price_required'] = final_data['wac_p'] / (1 - final_data['min_t2_margin_required'])

# Find violations
t2_violations = (final_data['tier_2_price'].notna() & 
                 (final_data['tier_2_margin_actual'] < final_data['min_t2_margin_required']))

if t2_violations.sum() > 0:
    # Fix: Set tier_2_price to minimum required
    final_data.loc[t2_violations, 'tier_2_price'] = final_data.loc[t2_violations, 'min_t2_price_required'].round(2)
    # Update discount percentage
    final_data.loc[t2_violations, 'discount_2'] = (
        final_data.loc[t2_violations, 'packing_unit_price'] - final_data.loc[t2_violations, 'tier_2_price']
    ).round(2)
    final_data.loc[t2_violations, 'discount_2_pct'] = (
        final_data.loc[t2_violations, 'discount_2'] / final_data.loc[t2_violations, 'packing_unit_price'] * 100
    ).round(2)
    print(f"  ✓ Fixed {t2_violations.sum()} SKUs with T2 margin < 40% of current margin")
else:
    print(f"  ✓ All T2 prices meet 40% margin constraint")

# ==========================================================================
# WHOLESALE MARGIN CONSTRAINT: >= 25% of current margin
# ==========================================================================
final_data['ws_margin_actual'] = (final_data['ws_new_price'] - final_data['wac_p']) / final_data['ws_new_price']
final_data['min_ws_margin_required'] = 0.25 * final_data['current_margin']
final_data['min_ws_price_required'] = final_data['wac_p'] / (1 - final_data['min_ws_margin_required'])

# Find violations
ws_violations = (final_data['ws_new_price'].notna() & 
                 (final_data['ws_margin_actual'] < final_data['min_ws_margin_required']))

if ws_violations.sum() > 0:
    # Fix: Set ws_new_price to minimum required
    final_data.loc[ws_violations, 'ws_new_price'] = final_data.loc[ws_violations, 'min_ws_price_required'].round(2)
    # Update discount percentage
    final_data.loc[ws_violations, 'ws_discount_pct'] = (
        (final_data.loc[ws_violations, 'packing_unit_price'] - final_data.loc[ws_violations, 'ws_new_price']) / 
        final_data.loc[ws_violations, 'packing_unit_price'] * 100
    ).round(2)
    print(f"  ✓ Fixed {ws_violations.sum()} SKUs with WS margin < 25% of current margin")
else:
    print(f"  ✓ All WS prices meet 25% margin constraint")

# ==========================================================================
# RE-VALIDATE TIER ORDERING AFTER MARGIN FIXES
# ==========================================================================
# Ensure: T2 price < T1 price (T2 discount > T1 discount)
ordering_violations = (final_data['tier_2_price'].notna() & 
                       final_data['tier_1_price'].notna() &
                       (final_data['tier_2_price'] >= final_data['tier_1_price']))

if ordering_violations.sum() > 0:
    # Invalidate both tiers for these SKUs (margin constraints conflict with ordering)
    final_data.loc[ordering_violations, 'tier_1_price'] = np.nan
    final_data.loc[ordering_violations, 'tier_2_price'] = np.nan
    final_data.loc[ordering_violations, 'discount_1'] = np.nan
    final_data.loc[ordering_violations, 'discount_2'] = np.nan
    final_data.loc[ordering_violations, 'discount_1_pct'] = np.nan
    final_data.loc[ordering_violations, 'discount_2_pct'] = np.nan
    print(f"  ⚠ Invalidated {ordering_violations.sum()} SKUs where T2 price >= T1 price after margin fix")

# Ensure: WS price < T2 price (WS discount > T2 discount)
ws_ordering_violations = (final_data['ws_new_price'].notna() & 
                          final_data['tier_2_price'].notna() &
                          (final_data['ws_new_price'] >= final_data['tier_2_price']))

if ws_ordering_violations.sum() > 0:
    # Invalidate WS for these SKUs
    final_data.loc[ws_ordering_violations, 'ws_new_price'] = np.nan
    final_data.loc[ws_ordering_violations, 'ws_new_qty'] = np.nan
    final_data.loc[ws_ordering_violations, 'ws_discount_pct'] = np.nan
    print(f"  ⚠ Invalidated {ws_ordering_violations.sum()} SKUs where WS price >= T2 price after margin fix")

# Drop temporary columns
temp_cols = ['min_t1_margin_required', 'min_t1_price_required', 'tier_1_margin_actual',
             'min_t2_margin_required', 'min_t2_price_required', 'tier_2_margin_actual',
             'min_ws_margin_required', 'min_ws_price_required', 'ws_margin_actual']
final_data = final_data.drop(columns=[c for c in temp_cols if c in final_data.columns], errors='ignore')

# ==========================================================================
# FINAL SUMMARY
# ==========================================================================
print("\n" + "-"*80)
print("MARGIN CONSTRAINT SUMMARY")
print("-"*80)

# Calculate final margins
final_data['tier_1_margin_final'] = (final_data['tier_1_price'] - final_data['wac_p']) / final_data['tier_1_price']
final_data['tier_2_margin_final'] = (final_data['tier_2_price'] - final_data['wac_p']) / final_data['tier_2_price']
final_data['ws_margin_final'] = (final_data['ws_new_price'] - final_data['wac_p']) / final_data['ws_new_price']

# Calculate margin retention percentages
final_data['t1_margin_retention'] = (final_data['tier_1_margin_final'] / final_data['current_margin'] * 100).round(1)
final_data['t2_margin_retention'] = (final_data['tier_2_margin_final'] / final_data['current_margin'] * 100).round(1)
final_data['ws_margin_retention'] = (final_data['ws_margin_final'] / final_data['current_margin'] * 100).round(1)

valid_t1 = final_data['tier_1_price'].notna()
valid_t2 = final_data['tier_2_price'].notna()
valid_ws = final_data['ws_new_price'].notna()

print(f"\nCurrent Margin:      {final_data['current_margin'].mean()*100:.2f}% (average)")
print(f"\nTier 1 (min 60%):")
print(f"  - Valid SKUs:      {valid_t1.sum()}")
print(f"  - Avg margin:      {final_data.loc[valid_t1, 'tier_1_margin_final'].mean()*100:.2f}%")
print(f"  - Avg retention:   {final_data.loc[valid_t1, 't1_margin_retention'].mean():.1f}%")
print(f"  - Min retention:   {final_data.loc[valid_t1, 't1_margin_retention'].min():.1f}%")

print(f"\nTier 2 (min 40%):")
print(f"  - Valid SKUs:      {valid_t2.sum()}")
print(f"  - Avg margin:      {final_data.loc[valid_t2, 'tier_2_margin_final'].mean()*100:.2f}%")
print(f"  - Avg retention:   {final_data.loc[valid_t2, 't2_margin_retention'].mean():.1f}%")
print(f"  - Min retention:   {final_data.loc[valid_t2, 't2_margin_retention'].min():.1f}%")

print(f"\nWholesale (min 25%):")
print(f"  - Valid SKUs:      {valid_ws.sum()}")
print(f"  - Avg margin:      {final_data.loc[valid_ws, 'ws_margin_final'].mean()*100:.2f}%")
print(f"  - Avg retention:   {final_data.loc[valid_ws, 'ws_margin_retention'].mean():.1f}%")
print(f"  - Min retention:   {final_data.loc[valid_ws, 'ws_margin_retention'].min():.1f}%")

# Drop temporary columns
temp_cols = ['tier_1_margin_final', 'tier_2_margin_final', 'ws_margin_final',
             't1_margin_retention', 't2_margin_retention', 'ws_margin_retention']
final_data = final_data.drop(columns=[c for c in temp_cols if c in final_data.columns], errors='ignore')

print("\n" + "="*80)



ENFORCING MARGIN CONSTRAINTS (FINAL VALIDATION)
  ✓ Fixed 3 SKUs with T1 margin < 60% of current margin
  ✓ Fixed 71 SKUs with T2 margin < 40% of current margin
  ✓ Fixed 7 SKUs with WS margin < 25% of current margin

--------------------------------------------------------------------------------
MARGIN CONSTRAINT SUMMARY
--------------------------------------------------------------------------------

Current Margin:      7.72% (average)

Tier 1 (min 60%):
  - Valid SKUs:      3086
  - Avg margin:      6.83%
  - Avg retention:   88.0%
  - Min retention:   60.0%

Tier 2 (min 40%):
  - Valid SKUs:      3086
  - Avg margin:      5.56%
  - Avg retention:   71.0%
  - Min retention:   38.9%

Wholesale (min 25%):
  - Valid SKUs:      1128
  - Avg margin:      3.69%
  - Avg retention:   64.9%
  - Min retention:   25.0%



In [43]:
final_data['t1_f'] = final_data['tier_1_qty'].notna().astype(int)
final_data['t2_f'] = final_data['tier_2_qty'].notna().astype(int)
final_data['ws_f'] = final_data['ws_new_qty'].notna().astype(int)
final_data['all_f'] = final_data['t1_f']+final_data['t2_f']+final_data['ws_f']
final_data=final_data.sort_values(by =['warehouse_id','final_rank'],ascending = [True,True] )
final_data['cumsum'] = final_data.groupby('warehouse_id')['all_f'].cumsum()
final_data = final_data[final_data['cumsum'] <= 400]
print(f"✓ Filtered to top {FINAL_PRODUCTS_PER_WAREHOUSE} products per warehouse: {len(final_data)} total SKUs")

✓ Filtered to top 133 products per warehouse: 2040 total SKUs


In [44]:
# =============================================================================
# CALCULATE ADDITIONAL METRICS
# =============================================================================

# --- Stretch Percentages (how much retailers need to increase to reach each tier) ---
# Already included from tiers_selection: tier_1_increase_pct, tier_2_increase_pct
# These show: (tier_qty - median_qty) / median_qty * 100

# Rename for clarity
final_data['stretch_to_tier_1_pct'] = final_data['tier_1_increase_pct']
final_data['stretch_to_tier_2_pct'] = final_data['tier_2_increase_pct']

# --- Margins for each price tier ---
# Margin = (price - wac) / price
final_data['tier_1_margin'] = ((final_data['tier_1_price'] - final_data['wac_p']) / final_data['tier_1_price']).round(4)
final_data['tier_2_margin'] = ((final_data['tier_2_price'] - final_data['wac_p']) / final_data['tier_2_price']).round(4)
final_data['WS_margin'] = ((final_data['ws_new_price'] - final_data['wac_p']) / final_data['wac_p']).round(4)
final_data['current_margin'] = ((final_data['packing_unit_price'] - final_data['wac_p']) / final_data['packing_unit_price']).round(4)

# --- Discount calculations ---
# Absolute discounts (price reduction from current price)
final_data['discount_1'] = (final_data['packing_unit_price'] - final_data['tier_1_price']).round(2)
final_data['discount_2'] = (final_data['packing_unit_price'] - final_data['tier_2_price']).round(2)

# Discount percentages
final_data['discount_1_pct'] = ((final_data['discount_1'] / final_data['packing_unit_price']) * 100).round(2)
final_data['discount_2_pct'] = ((final_data['discount_2'] / final_data['packing_unit_price']) * 100).round(2)

# --- Quantity and Discount Ratios ---
# Quantity ratio (tier_2_qty / tier_1_qty)
final_data['qty_ratio'] = (final_data['tier_2_qty'] / final_data['tier_1_qty']).round(2)

# Discount ratio (discount_2 / discount_1)
final_data['discount_ratio'] = (final_data['discount_2'] / final_data['discount_1']).round(2)

# Elasticity ratio = discount_ratio / qty_ratio
# This shows how much extra discount per unit of quantity increase
final_data['elasticity_ratio'] = (final_data['discount_ratio'] / final_data['qty_ratio']).round(2)

print("=== METRICS SUMMARY ===")
print(f"\nStretch Analysis (how much retailers need to increase orders):")
print(f"  Average stretch to Tier 1: {final_data['stretch_to_tier_1_pct'].mean():.1f}%")
print(f"  Average stretch to Tier 2: {final_data['stretch_to_tier_2_pct'].mean():.1f}%")

print(f"\nMargin Analysis:")
print(f"  Current margin:  {final_data['current_margin'].mean()*100:.2f}%")
print(f"  Tier 1 margin:   {final_data['tier_1_margin'].mean()*100:.2f}%")
print(f"  Tier 2 margin:   {final_data['tier_2_margin'].mean()*100:.2f}%")
print(f"  WS margin:       {final_data['WS_margin'].mean()*100:.2f}%")

print(f"\nDiscount Analysis:")
print(f"  Average Tier 1 discount: {final_data['discount_1_pct'].mean():.2f}%")
print(f"  Average Tier 2 discount: {final_data['discount_2_pct'].mean():.2f}%")

print(f"\nElasticity Analysis (discount increase vs quantity increase):")
print(f"  Average qty ratio (T2/T1): {final_data['qty_ratio'].mean():.2f}x")
print(f"  Average discount ratio (D2/D1): {final_data['discount_ratio'].mean():.2f}x")
print(f"  Average elasticity ratio: {final_data['elasticity_ratio'].mean():.2f}")


=== METRICS SUMMARY ===

Stretch Analysis (how much retailers need to increase orders):
  Average stretch to Tier 1: 247.6%
  Average stretch to Tier 2: 513.4%

Margin Analysis:
  Current margin:  7.82%
  Tier 1 margin:   6.92%
  Tier 2 margin:   5.58%
  WS margin:       3.84%

Discount Analysis:
  Average Tier 1 discount: 0.97%
  Average Tier 2 discount: 2.39%

Elasticity Analysis (discount increase vs quantity increase):
  Average qty ratio (T2/T1): 1.81x
  Average discount ratio (D2/D1): 2.93x
  Average elasticity ratio: 1.65


## 7. Conversion Scenarios & Simulation

Before uploading, analyze expected blended prices and margins:
1. **Hypothetical Scenarios** - 10 different conversion rate assumptions
2. **Historical Simulation** - Actual tier conversion from previous month data


In [45]:
# =============================================================================
# PART 1: HYPOTHETICAL CONVERSION SCENARIOS
# =============================================================================
# 10 scenarios with different conversion rates:
# - Base (no tier): % of orders at packing_unit_price
# - Tier 1: % of orders at tier_1_price
# - Tier 2: % of orders at tier_2_price
# - Wholesale: % of orders at ws_new_price

# Define 10 scenarios: (base%, tier1%, tier2%, ws%)
# Scenarios range from pessimistic (low conversion) to optimistic (high conversion)
scenarios = {
    'Scenario 1 - Very Low Conversion':   (90, 7, 2, 1),    # Most orders at base price
    'Scenario 2 - Low Conversion':        (80, 12, 5, 3),   # Low tier uptake
    'Scenario 3 - Conservative':          (70, 15, 10, 5),  # Conservative estimate
    'Scenario 4 - Moderate Low':          (65, 18, 12, 5),  # Slightly better
    'Scenario 5 - Moderate':              (60, 20, 13, 7),  # Moderate adoption
    'Scenario 6 - Moderate High':         (55, 22, 15, 8),  # Good adoption
    'Scenario 7 - Optimistic':            (50, 25, 17, 8),  # Optimistic uptake
    'Scenario 8 - High Conversion':       (45, 27, 18, 10), # High tier adoption
    'Scenario 9 - Very High Conversion':  (40, 28, 20, 12), # Very high uptake
    'Scenario 10 - Maximum Conversion':   (35, 30, 22, 13), # Maximum realistic conversion
    'Scenario 10 - Current Conversion':   (75, 8, 14, 12), # Maximum realistic conversion
}

def calculate_blended_metrics_with_gp(df, base_pct, t1_pct, t2_pct, ws_pct, num_orders=1000):
    """
    Calculate blended price, margin, NMV and Gross Profit for a given conversion scenario.
    
    Key insight: When retailers convert to tiers, they buy MORE quantity (that's the incentive).
    - Base orders: quantity = median_qty (typical order before tier)
    - Tier 1 orders: quantity = tier_1_qty (must reach this to get discount)
    - Tier 2 orders: quantity = tier_2_qty
    - WS orders: quantity = ws_new_qty
    
    Args:
        df: DataFrame with price and quantity columns
        base_pct, t1_pct, t2_pct, ws_pct: % of orders in each tier
        num_orders: Assumed number of total orders for simulation
    
    Returns:
        dict with all metrics
    """
    # Convert percentages to decimals
    base_pct = base_pct / 100
    t1_pct = t1_pct / 100
    t2_pct = t2_pct / 100
    ws_pct = ws_pct / 100
    
    df_calc = df.copy()
    
    # Fill missing values
    df_calc['ws_price_filled'] = df_calc['ws_new_price'].fillna(df_calc['tier_2_price'])
    df_calc['ws_qty_filled'] = df_calc['ws_new_qty'].fillna(df_calc['tier_2_qty'])
    
    # Calculate NMV and COGS for each tier (per SKU, per order)
    # Base: orders at median_qty * packing_unit_price
    df_calc['base_nmv_per_order'] = df_calc['median_qty'] * df_calc['packing_unit_price']
    df_calc['base_cogs_per_order'] = df_calc['median_qty'] * df_calc['wac_p']
    
    # Tier 1: orders at tier_1_qty * tier_1_price
    df_calc['t1_nmv_per_order'] = df_calc['tier_1_qty'] * df_calc['tier_1_price']
    df_calc['t1_cogs_per_order'] = df_calc['tier_1_qty'] * df_calc['wac_p']
    
    # Tier 2: orders at tier_2_qty * tier_2_price
    df_calc['t2_nmv_per_order'] = df_calc['tier_2_qty'] * df_calc['tier_2_price']
    df_calc['t2_cogs_per_order'] = df_calc['tier_2_qty'] * df_calc['wac_p']
    
    # Wholesale: orders at ws_qty * ws_price
    df_calc['ws_nmv_per_order'] = df_calc['ws_qty_filled'] * df_calc['ws_price_filled']
    df_calc['ws_cogs_per_order'] = df_calc['ws_qty_filled'] * df_calc['wac_p']
    
    # Blended NMV per order (weighted by conversion rates)
    df_calc['blended_nmv_per_order'] = (
        base_pct * df_calc['base_nmv_per_order'] +
        t1_pct * df_calc['t1_nmv_per_order'] +
        t2_pct * df_calc['t2_nmv_per_order'] +
        ws_pct * df_calc['ws_nmv_per_order']
    )
    
    # Blended COGS per order
    df_calc['blended_cogs_per_order'] = (
        base_pct * df_calc['base_cogs_per_order'] +
        t1_pct * df_calc['t1_cogs_per_order'] +
        t2_pct * df_calc['t2_cogs_per_order'] +
        ws_pct * df_calc['ws_cogs_per_order']
    )
    
    # Blended quantity per order
    df_calc['blended_qty_per_order'] = (
        base_pct * df_calc['median_qty'] +
        t1_pct * df_calc['tier_1_qty'] +
        t2_pct * df_calc['tier_2_qty'] +
        ws_pct * df_calc['ws_qty_filled']
    )
    
    # Gross Profit per order
    df_calc['blended_gp_per_order'] = df_calc['blended_nmv_per_order'] - df_calc['blended_cogs_per_order']
    
    # Current state (100% base)
    df_calc['current_nmv_per_order'] = df_calc['base_nmv_per_order']
    df_calc['current_cogs_per_order'] = df_calc['base_cogs_per_order']
    df_calc['current_gp_per_order'] = df_calc['current_nmv_per_order'] - df_calc['current_cogs_per_order']
    
    # Aggregate across all SKUs (simulate num_orders distributed across SKUs)
    orders_per_sku = num_orders / len(df_calc)
    
    total_current_nmv = (df_calc['current_nmv_per_order'] * orders_per_sku).sum()
    total_current_cogs = (df_calc['current_cogs_per_order'] * orders_per_sku).sum()
    total_current_gp = total_current_nmv - total_current_cogs
    total_current_qty = (df_calc['median_qty'] * orders_per_sku).sum()
    
    total_blended_nmv = (df_calc['blended_nmv_per_order'] * orders_per_sku).sum()
    total_blended_cogs = (df_calc['blended_cogs_per_order'] * orders_per_sku).sum()
    total_blended_gp = total_blended_nmv - total_blended_cogs
    total_blended_qty = (df_calc['blended_qty_per_order'] * orders_per_sku).sum()
    
    # Calculate changes
    nmv_change = total_blended_nmv - total_current_nmv
    nmv_change_pct = (nmv_change / total_current_nmv) * 100
    
    qty_change = total_blended_qty - total_current_qty
    qty_change_pct = (qty_change / total_current_qty) * 100
    
    gp_change = total_blended_gp - total_current_gp
    gp_change_pct = (gp_change / total_current_gp) * 100 if total_current_gp != 0 else 0
    
    # Blended margins
    current_margin = total_current_gp / total_current_nmv if total_current_nmv != 0 else 0
    blended_margin = total_blended_gp / total_blended_nmv if total_blended_nmv != 0 else 0
    
    # Average blended price per unit
    avg_current_price = total_current_nmv / total_current_qty if total_current_qty != 0 else 0
    avg_blended_price = total_blended_nmv / total_blended_qty if total_blended_qty != 0 else 0
    
    return {
        'current_nmv': total_current_nmv,
        'blended_nmv': total_blended_nmv,
        'nmv_change': nmv_change,
        'nmv_change_pct': nmv_change_pct,
        'current_qty': total_current_qty,
        'blended_qty': total_blended_qty,
        'qty_change': qty_change,
        'qty_change_pct': qty_change_pct,
        'current_gp': total_current_gp,
        'blended_gp': total_blended_gp,
        'gp_change': gp_change,
        'gp_change_pct': gp_change_pct,
        'current_margin': current_margin,
        'blended_margin': blended_margin,
        'avg_current_price': avg_current_price,
        'avg_blended_price': avg_blended_price
    }

# Calculate and display results for each scenario
print("=" * 120)
print("CONVERSION SCENARIOS ANALYSIS - NMV, Quantity & Gross Profit Impact")
print("=" * 120)

# Assume 1000 orders for simulation
NUM_ORDERS = 1000

print(f"\nSimulation based on {NUM_ORDERS:,} orders distributed across {len(final_data)} SKUs")
print(f"\n{'Scenario':<35} {'Base':>5} {'T1':>4} {'T2':>4} {'WS':>4} | {'NMV Δ%':>8} {'Qty Δ%':>8} {'GP Δ%':>8} | {'Blnd Margin':>11} {'GP (EGP)':>12}")
print("-" * 120)

scenario_results = []
for scenario_name, (base, t1, t2, ws) in scenarios.items():
    metrics = calculate_blended_metrics_with_gp(final_data, base, t1, t2, ws, NUM_ORDERS)
    
    scenario_results.append({
        'scenario': scenario_name,
        'base_pct': base,
        't1_pct': t1,
        't2_pct': t2,
        'ws_pct': ws,
        **metrics
    })
    
    print(f"{scenario_name:<35} {base:>4}% {t1:>3}% {t2:>3}% {ws:>3}% | "
          f"{metrics['nmv_change_pct']:>+7.1f}% {metrics['qty_change_pct']:>+7.1f}% {metrics['gp_change_pct']:>+7.1f}% | "
          f"{metrics['blended_margin']*100:>10.2f}% {metrics['blended_gp']:>11,.0f}")

# Create summary DataFrame
scenarios_df = pd.DataFrame(scenario_results)

# Current state baseline
current_metrics = calculate_blended_metrics_with_gp(final_data, 100, 0, 0, 0, NUM_ORDERS)

print("\n" + "=" * 120)
print("DETAILED COMPARISON")
print("=" * 120)

print(f"\n{'CURRENT STATE (100% Base Price)':^60}")
print("-" * 60)
print(f"  Total NMV:           {current_metrics['current_nmv']:>15,.2f} EGP")
print(f"  Total Quantity:      {current_metrics['current_qty']:>15,.0f} units")
print(f"  Total COGS:          {current_metrics['current_nmv'] - current_metrics['current_gp']:>15,.2f} EGP")
print(f"  Total Gross Profit:  {current_metrics['current_gp']:>15,.2f} EGP")
print(f"  Gross Margin:        {current_metrics['current_margin']*100:>15.2f}%")

# Conservative scenario
cons = scenarios_df[scenarios_df['scenario'].str.contains('Conservative')].iloc[0]
print(f"\n{'CONSERVATIVE SCENARIO (70/15/10/5)':^60}")
print("-" * 60)
print(f"  Total NMV:           {cons['blended_nmv']:>15,.2f} EGP  ({cons['nmv_change_pct']:>+.1f}%)")
print(f"  Total Quantity:      {cons['blended_qty']:>15,.0f} units ({cons['qty_change_pct']:>+.1f}%)")
print(f"  Total Gross Profit:  {cons['blended_gp']:>15,.2f} EGP  ({cons['gp_change_pct']:>+.1f}%)")
print(f"  Gross Margin:        {cons['blended_margin']*100:>15.2f}%")
print(f"  GP Change:           {cons['gp_change']:>+15,.2f} EGP")

# Optimistic scenario
opt = scenarios_df[scenarios_df['scenario'].str.contains('Optimistic')].iloc[0]
print(f"\n{'OPTIMISTIC SCENARIO (50/25/17/8)':^60}")
print("-" * 60)
print(f"  Total NMV:           {opt['blended_nmv']:>15,.2f} EGP  ({opt['nmv_change_pct']:>+.1f}%)")
print(f"  Total Quantity:      {opt['blended_qty']:>15,.0f} units ({opt['qty_change_pct']:>+.1f}%)")
print(f"  Total Gross Profit:  {opt['blended_gp']:>15,.2f} EGP  ({opt['gp_change_pct']:>+.1f}%)")
print(f"  Gross Margin:        {opt['blended_margin']*100:>15.2f}%")
print(f"  GP Change:           {opt['gp_change']:>+15,.2f} EGP")

# Summary table
print("\n" + "=" * 120)
print("SCENARIO IMPACT SUMMARY")
print("=" * 120)
print(f"\n{'Scenario':<35} | {'NMV Change':>15} | {'Qty Change':>15} | {'GP Change':>15} | {'GP Margin':>10}")
print("-" * 100)
for _, row in scenarios_df.iterrows():
    print(f"{row['scenario']:<35} | {row['nmv_change']:>+14,.0f} | {row['qty_change']:>+14,.0f} | {row['gp_change']:>+14,.0f} | {row['blended_margin']*100:>9.2f}%")


CONVERSION SCENARIOS ANALYSIS - NMV, Quantity & Gross Profit Impact

Simulation based on 1,000 orders distributed across 2040 SKUs

Scenario                             Base   T1   T2   WS |   NMV Δ%   Qty Δ%    GP Δ% | Blnd Margin     GP (EGP)
------------------------------------------------------------------------------------------------------------------------
Scenario 1 - Very Low Conversion      90%   7%   2%   1% |   +71.3%   +84.2%   +45.2% |       5.93%      28,422
Scenario 2 - Low Conversion           80%  12%   5%   3% |  +183.6%  +227.5%  +111.6% |       5.22%      41,429
Scenario 3 - Conservative             70%  15%  10%   5% |  +301.8%  +376.0%  +180.8% |       4.89%      54,973
Scenario 4 - Moderate Low             65%  18%  12%   5% |  +321.3%  +392.3%  +194.8% |       4.90%      57,714
Scenario 5 - Moderate                 60%  20%  13%   7% |  +414.1%  +519.3%  +247.2% |       4.73%      67,979
Scenario 6 - Moderate High            55%  22%  15%   8% |  +471.7%  +592.

In [46]:
# =============================================================================
# PART 2: HISTORICAL SIMULATION - Previous Month Tier Conversion
# =============================================================================
# Query actual order data from previous month to see real tier conversion rates
# Then calculate what the blended price and margin would have been

# Prepare product list for query
selected_df = final_data[['warehouse_id', 'product_id', 'packing_unit_id', 
                           'tier_1_qty', 'tier_2_qty', 'ws_new_qty',
                           'packing_unit_price', 'tier_1_price', 'tier_2_price', 
                           'ws_new_price', 'wac_p']].copy()

# Create tuples string for SQL
tuples_for_query = ','.join([
    f"({int(row['warehouse_id'])}, {int(row['product_id'])}, {int(row['packing_unit_id'])}, "
    f"{int(row['tier_1_qty'])}, {int(row['tier_2_qty'])}, {int(row['ws_new_qty']) if pd.notna(row['ws_new_qty']) else 0})"
    for _, row in selected_df.iterrows()
])

query = f'''
WITH selected_products AS (
    SELECT warehouse_id, product_id, packing_unit_id, tier_1_qty, tier_2_qty, ws_qty
    FROM (VALUES
        {tuples_for_query}
    ) AS x(warehouse_id, product_id, packing_unit_id, tier_1_qty, tier_2_qty, ws_qty)
),

-- Same base filtering as product selection
base AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
    FROM (
        SELECT x.*, TAGGABLE_ID as retailer_id 
        FROM (
            SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
            FROM cohorts 
            WHERE is_active = 'true'
                AND id IN (700,701,702,703,704,1123,1124,1125,1126)
        ) x 
        JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
        WHERE dt.taggable_id NOT IN (
            SELECT taggable_id FROM DYNAMIC_TAGgables 
            WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
        )
    )
    QUALIFY rnk = 1 
),

-- Map regions to warehouses
whs AS (
    SELECT * FROM (VALUES
        ('Cairo', 'El-Marg', 38),
        ('Cairo', 'Mostorod', 1),
        ('Giza', 'Barageel', 236),
        ('Giza', 'Sakkarah', 962),
        ('Delta West', 'El-Mahala', 337),
        ('Delta West', 'Tanta', 8),
        ('Delta East', 'Mansoura FC', 339),
        ('Delta East', 'Sharqya', 170),
        ('Upper Egypt', 'Assiut FC', 501),
        ('Upper Egypt', 'Bani sweif', 401),
        ('Upper Egypt', 'Menya Samalot', 703),
        ('Upper Egypt', 'Sohag', 632),
        ('Alexandria', 'Khorshed Alex', 797)
    ) x(region_name, wh, warehouse_id)
),

-- Get order quantities from previous month
previous_month_orders AS (
    SELECT 
        whs.warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        so.parent_sales_order_id,
        so.retailer_id,
        SUM(pso.purchased_item_count) as order_qty,
        SUM(pso.total_price) as order_value
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN base ON base.retailer_id = so.retailer_id
    JOIN materialized_views.retailer_polygon rp ON rp.retailer_id = so.retailer_id
    JOIN districts ON districts.id = rp.district_id
    JOIN cities ON cities.id = districts.city_id
    JOIN states ON states.id = cities.state_id
    JOIN regions ON regions.id = states.region_id
    JOIN whs ON whs.region_name = CASE WHEN regions.id = 2 THEN states.name_en ELSE regions.name_en END
    JOIN selected_products sp 
        ON sp.warehouse_id = whs.warehouse_id 
        AND sp.product_id = pso.product_id
        AND sp.packing_unit_id = pso.packing_unit_id
    WHERE so.created_at::date BETWEEN current_date - 31 and current_Date - 1 
                                   
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count > 0
    GROUP BY whs.warehouse_id, pso.product_id, pso.packing_unit_id, 
             so.parent_sales_order_id, so.retailer_id
),

-- Classify each order into tiers based on quantity
order_tiers AS (
    SELECT 
        pmo.*,
        sp.tier_1_qty,
        sp.tier_2_qty,
        sp.ws_qty,
        CASE 
            WHEN pmo.order_qty >= sp.ws_qty AND sp.ws_qty > 0 THEN 'Wholesale'
            WHEN pmo.order_qty >= sp.tier_2_qty THEN 'Tier 2'
            WHEN pmo.order_qty >= sp.tier_1_qty THEN 'Tier 1'
            ELSE 'Base'
        END as tier_reached
    FROM previous_month_orders pmo
    JOIN selected_products sp 
        ON sp.warehouse_id = pmo.warehouse_id 
        AND sp.product_id = pmo.product_id
        AND sp.packing_unit_id = pmo.packing_unit_id
),

-- Aggregate conversion rates per SKU
sku_conversion AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        COUNT(*) as total_orders,
        SUM(order_value) as total_value,
        
        -- Order counts by tier
        COUNT(CASE WHEN tier_reached = 'Base' THEN 1 END) as base_orders,
        COUNT(CASE WHEN tier_reached = 'Tier 1' THEN 1 END) as tier1_orders,
        COUNT(CASE WHEN tier_reached = 'Tier 2' THEN 1 END) as tier2_orders,
        COUNT(CASE WHEN tier_reached = 'Wholesale' THEN 1 END) as ws_orders,
        
        -- Conversion percentages
        ROUND(100.0 * COUNT(CASE WHEN tier_reached = 'Base' THEN 1 END) / NULLIF(COUNT(*), 0), 2) as base_pct,
        ROUND(100.0 * COUNT(CASE WHEN tier_reached = 'Tier 1' THEN 1 END) / NULLIF(COUNT(*), 0), 2) as tier1_pct,
        ROUND(100.0 * COUNT(CASE WHEN tier_reached = 'Tier 2' THEN 1 END) / NULLIF(COUNT(*), 0), 2) as tier2_pct,
        ROUND(100.0 * COUNT(CASE WHEN tier_reached = 'Wholesale' THEN 1 END) / NULLIF(COUNT(*), 0), 2) as ws_pct
        
    FROM order_tiers
    GROUP BY warehouse_id, product_id, packing_unit_id
),

-- Overall conversion rates
overall_conversion AS (
    SELECT 
        'Overall' as level,
        COUNT(*) as total_orders,
        SUM(order_value) as total_value,
        ROUND(100.0 * COUNT(CASE WHEN tier_reached = 'Base' THEN 1 END) / NULLIF(COUNT(*), 0), 2) as base_pct,
        ROUND(100.0 * COUNT(CASE WHEN tier_reached = 'Tier 1' THEN 1 END) / NULLIF(COUNT(*), 0), 2) as tier1_pct,
        ROUND(100.0 * COUNT(CASE WHEN tier_reached = 'Tier 2' THEN 1 END) / NULLIF(COUNT(*), 0), 2) as tier2_pct,
        ROUND(100.0 * COUNT(CASE WHEN tier_reached = 'Wholesale' THEN 1 END) / NULLIF(COUNT(*), 0), 2) as ws_pct
    FROM order_tiers
)

-- Return both SKU-level and overall results
SELECT 
    sc.warehouse_id,
    sc.product_id,
    sc.packing_unit_id,
    sc.total_orders,
    sc.total_value,
    sc.base_orders,
    sc.tier1_orders,
    sc.tier2_orders,
    sc.ws_orders,
    sc.base_pct,
    sc.tier1_pct,
    sc.tier2_pct,
    sc.ws_pct
FROM sku_conversion sc
ORDER BY sc.warehouse_id, sc.total_orders DESC
'''

print("Fetching historical tier conversion data from previous month...")
historical_conversion = snowflake_query("Egypt", query)
historical_conversion.columns = historical_conversion.columns.str.lower()

for col in historical_conversion.columns:
    historical_conversion[col] = pd.to_numeric(historical_conversion[col], errors='ignore')

print(f"✓ Retrieved conversion data for {len(historical_conversion)} SKUs")
print(f"  Total orders analyzed: {historical_conversion['total_orders'].sum():,}")

# Calculate overall conversion rates
total_orders = historical_conversion['total_orders'].sum()
overall_base_pct = (historical_conversion['base_orders'].sum() / total_orders) * 100
overall_t1_pct = (historical_conversion['tier1_orders'].sum() / total_orders) * 100
overall_t2_pct = (historical_conversion['tier2_orders'].sum() / total_orders) * 100
overall_ws_pct = (historical_conversion['ws_orders'].sum() / total_orders) * 100

print("\n" + "=" * 80)
print("HISTORICAL TIER CONVERSION (Previous Month)")
print("=" * 80)
print(f"\nOverall Conversion Rates (based on {total_orders:,} orders):")
print(f"  Base (no tier):  {overall_base_pct:>6.2f}%  ({historical_conversion['base_orders'].sum():,} orders)")
print(f"  Tier 1:          {overall_t1_pct:>6.2f}%  ({historical_conversion['tier1_orders'].sum():,} orders)")
print(f"  Tier 2:          {overall_t2_pct:>6.2f}%  ({historical_conversion['tier2_orders'].sum():,} orders)")
print(f"  Wholesale:       {overall_ws_pct:>6.2f}%  ({historical_conversion['ws_orders'].sum():,} orders)")


Fetching historical tier conversion data from previous month...
✓ Retrieved conversion data for 2040 SKUs
  Total orders analyzed: 486,667

HISTORICAL TIER CONVERSION (Previous Month)

Overall Conversion Rates (based on 486,667 orders):
  Base (no tier):   87.36%  (425,167 orders)
  Tier 1:            8.25%  (40,141 orders)
  Tier 2:            4.37%  (21,249 orders)
  Wholesale:         0.02%  (110 orders)


In [47]:
# =============================================================================
# PART 2B: CALCULATE BLENDED NMV, GROSS PROFIT FROM HISTORICAL CONVERSION
# =============================================================================

# Merge historical conversion data with pricing and quantity data
simulation_data = historical_conversion.merge(
    final_data[['warehouse_id', 'product_id', 'packing_unit_id',
                'packing_unit_price', 'tier_1_price', 'tier_2_price', 
                'ws_new_price', 'wac_p', 'sku', 'brand',
                'median_qty', 'tier_1_qty', 'tier_2_qty', 'ws_new_qty']],
    on=['warehouse_id', 'product_id', 'packing_unit_id'],
    how='left'
)

# Fill missing values
simulation_data['ws_price_filled'] = simulation_data['ws_new_price'].fillna(simulation_data['tier_2_price'])
simulation_data['ws_qty_filled'] = simulation_data['ws_new_qty'].fillna(simulation_data['tier_2_qty'])

# =============================================================================
# Calculate NMV & GP per order for each tier
# =============================================================================

# Current state: All orders at base price with median quantity
simulation_data['current_qty_per_order'] = simulation_data['median_qty']
simulation_data['current_nmv_per_order'] = simulation_data['median_qty'] * simulation_data['packing_unit_price']
simulation_data['current_cogs_per_order'] = simulation_data['median_qty'] * simulation_data['wac_p']
simulation_data['current_gp_per_order'] = simulation_data['current_nmv_per_order'] - simulation_data['current_cogs_per_order']

# Blended state: Orders distributed across tiers with corresponding quantities
# Base orders: median_qty at packing_unit_price
# T1 orders: tier_1_qty at tier_1_price
# T2 orders: tier_2_qty at tier_2_price
# WS orders: ws_qty at ws_price

simulation_data['blended_qty_per_order'] = (
    (simulation_data['base_pct'] / 100) * simulation_data['median_qty'] +
    (simulation_data['tier1_pct'] / 100) * simulation_data['tier_1_qty'] +
    (simulation_data['tier2_pct'] / 100) * simulation_data['tier_2_qty'] +
    (simulation_data['ws_pct'] / 100) * simulation_data['ws_qty_filled']
)

simulation_data['blended_nmv_per_order'] = (
    (simulation_data['base_pct'] / 100) * simulation_data['median_qty'] * simulation_data['packing_unit_price'] +
    (simulation_data['tier1_pct'] / 100) * simulation_data['tier_1_qty'] * simulation_data['tier_1_price'] +
    (simulation_data['tier2_pct'] / 100) * simulation_data['tier_2_qty'] * simulation_data['tier_2_price'] +
    (simulation_data['ws_pct'] / 100) * simulation_data['ws_qty_filled'] * simulation_data['ws_price_filled']
)

simulation_data['blended_cogs_per_order'] = (
    (simulation_data['base_pct'] / 100) * simulation_data['median_qty'] * simulation_data['wac_p'] +
    (simulation_data['tier1_pct'] / 100) * simulation_data['tier_1_qty'] * simulation_data['wac_p'] +
    (simulation_data['tier2_pct'] / 100) * simulation_data['tier_2_qty'] * simulation_data['wac_p'] +
    (simulation_data['ws_pct'] / 100) * simulation_data['ws_qty_filled'] * simulation_data['wac_p']
)

simulation_data['blended_gp_per_order'] = simulation_data['blended_nmv_per_order'] - simulation_data['blended_cogs_per_order']

# Calculate totals using actual order counts
simulation_data['total_current_nmv'] = simulation_data['current_nmv_per_order'] * simulation_data['total_orders']
simulation_data['total_current_cogs'] = simulation_data['current_cogs_per_order'] * simulation_data['total_orders']
simulation_data['total_current_gp'] = simulation_data['total_current_nmv'] - simulation_data['total_current_cogs']
simulation_data['total_current_qty'] = simulation_data['current_qty_per_order'] * simulation_data['total_orders']

simulation_data['total_blended_nmv'] = simulation_data['blended_nmv_per_order'] * simulation_data['total_orders']
simulation_data['total_blended_cogs'] = simulation_data['blended_cogs_per_order'] * simulation_data['total_orders']
simulation_data['total_blended_gp'] = simulation_data['total_blended_nmv'] - simulation_data['total_blended_cogs']
simulation_data['total_blended_qty'] = simulation_data['blended_qty_per_order'] * simulation_data['total_orders']

# Calculate changes
simulation_data['nmv_change'] = simulation_data['total_blended_nmv'] - simulation_data['total_current_nmv']
simulation_data['qty_change'] = simulation_data['total_blended_qty'] - simulation_data['total_current_qty']
simulation_data['gp_change'] = simulation_data['total_blended_gp'] - simulation_data['total_current_gp']

# Margins
simulation_data['current_margin'] = simulation_data['total_current_gp'] / simulation_data['total_current_nmv']
simulation_data['blended_margin'] = simulation_data['total_blended_gp'] / simulation_data['total_blended_nmv']

# =============================================================================
# Summary Statistics
# =============================================================================
print("\n" + "=" * 120)
print("HISTORICAL SIMULATION - NMV, QUANTITY & GROSS PROFIT IMPACT")
print("=" * 120)

# Aggregate totals
total_orders = simulation_data['total_orders'].sum()
total_current_nmv = simulation_data['total_current_nmv'].sum()
total_blended_nmv = simulation_data['total_blended_nmv'].sum()
total_current_qty = simulation_data['total_current_qty'].sum()
total_blended_qty = simulation_data['total_blended_qty'].sum()
total_current_gp = simulation_data['total_current_gp'].sum()
total_blended_gp = simulation_data['total_blended_gp'].sum()

nmv_change = total_blended_nmv - total_current_nmv
qty_change = total_blended_qty - total_current_qty
gp_change = total_blended_gp - total_current_gp

print(f"\nBased on {total_orders:,} historical orders across {len(simulation_data)} SKUs")
print(f"Conversion: Base {overall_base_pct:.1f}% | T1 {overall_t1_pct:.1f}% | T2 {overall_t2_pct:.1f}% | WS {overall_ws_pct:.1f}%")

print(f"\n{'METRIC':<25} {'CURRENT':>18} {'BLENDED':>18} {'CHANGE':>18} {'CHANGE %':>12}")
print("-" * 95)
print(f"{'Total NMV (EGP)':<25} {total_current_nmv:>17,.0f} {total_blended_nmv:>17,.0f} {nmv_change:>+17,.0f} {(nmv_change/total_current_nmv)*100:>+11.2f}%")
print(f"{'Total Quantity (units)':<25} {total_current_qty:>17,.0f} {total_blended_qty:>17,.0f} {qty_change:>+17,.0f} {(qty_change/total_current_qty)*100:>+11.2f}%")
print(f"{'Total COGS (EGP)':<25} {total_current_nmv-total_current_gp:>17,.0f} {total_blended_nmv-total_blended_gp:>17,.0f} {(total_blended_nmv-total_blended_gp)-(total_current_nmv-total_current_gp):>+17,.0f} {((total_blended_nmv-total_blended_gp)-(total_current_nmv-total_current_gp))/(total_current_nmv-total_current_gp)*100:>+11.2f}%")
print(f"{'Total Gross Profit (EGP)':<25} {total_current_gp:>17,.0f} {total_blended_gp:>17,.0f} {gp_change:>+17,.0f} {(gp_change/total_current_gp)*100:>+11.2f}%")
print(f"{'Gross Margin (%)':<25} {(total_current_gp/total_current_nmv)*100:>17.2f}% {(total_blended_gp/total_blended_nmv)*100:>17.2f}% {((total_blended_gp/total_blended_nmv)-(total_current_gp/total_current_nmv))*100:>+17.2f} pp")

# Warehouse breakdown with GP
print(f"\n" + "=" * 120)
print("WAREHOUSE-LEVEL BREAKDOWN")
print("=" * 120)

warehouse_summary = simulation_data.groupby('warehouse_id').agg({
    'total_orders': 'sum',
    'base_orders': 'sum',
    'tier1_orders': 'sum',
    'tier2_orders': 'sum',
    'ws_orders': 'sum',
    'total_current_nmv': 'sum',
    'total_blended_nmv': 'sum',
    'total_current_gp': 'sum',
    'total_blended_gp': 'sum',
    'total_current_qty': 'sum',
    'total_blended_qty': 'sum'
}).reset_index()

warehouse_summary['base_pct'] = (warehouse_summary['base_orders'] / warehouse_summary['total_orders'] * 100).round(1)
warehouse_summary['tier1_pct'] = (warehouse_summary['tier1_orders'] / warehouse_summary['total_orders'] * 100).round(1)
warehouse_summary['tier2_pct'] = (warehouse_summary['tier2_orders'] / warehouse_summary['total_orders'] * 100).round(1)
warehouse_summary['ws_pct'] = (warehouse_summary['ws_orders'] / warehouse_summary['total_orders'] * 100).round(1)
warehouse_summary['nmv_change'] = warehouse_summary['total_blended_nmv'] - warehouse_summary['total_current_nmv']
warehouse_summary['nmv_change_pct'] = (warehouse_summary['nmv_change'] / warehouse_summary['total_current_nmv'] * 100).round(1)
warehouse_summary['gp_change'] = warehouse_summary['total_blended_gp'] - warehouse_summary['total_current_gp']
warehouse_summary['gp_change_pct'] = (warehouse_summary['gp_change'] / warehouse_summary['total_current_gp'] * 100).round(1)
warehouse_summary['current_margin'] = (warehouse_summary['total_current_gp'] / warehouse_summary['total_current_nmv'] * 100).round(2)
warehouse_summary['blended_margin'] = (warehouse_summary['total_blended_gp'] / warehouse_summary['total_blended_nmv'] * 100).round(2)

print(f"\n{'WH':>5} {'Orders':>7} {'Base%':>6} {'T1%':>5} {'T2%':>5} {'WS%':>5} | {'NMV Δ':>12} {'NMV Δ%':>8} | {'GP Δ':>12} {'GP Δ%':>8} | {'Margin':>7}")
print("-" * 110)

for _, row in warehouse_summary.iterrows():
    print(f"{int(row['warehouse_id']):>5} {int(row['total_orders']):>7} "
          f"{row['base_pct']:>5.1f}% {row['tier1_pct']:>4.1f}% {row['tier2_pct']:>4.1f}% {row['ws_pct']:>4.1f}% | "
          f"{row['nmv_change']:>+11,.0f} {row['nmv_change_pct']:>+7.1f}% | "
          f"{row['gp_change']:>+11,.0f} {row['gp_change_pct']:>+7.1f}% | "
          f"{row['blended_margin']:>6.2f}%")

# Compare historical conversion to scenarios
print("\n" + "=" * 120)
print("COMPARISON: Historical vs Hypothetical Scenarios")
print("=" * 120)

# Calculate blended metrics using historical rates
hist_metrics = calculate_blended_metrics_with_gp(
    final_data, overall_base_pct, overall_t1_pct, overall_t2_pct, overall_ws_pct, NUM_ORDERS
)

print(f"\nHistorical Conversion ({overall_base_pct:.1f}/{overall_t1_pct:.1f}/{overall_t2_pct:.1f}/{overall_ws_pct:.1f}):")
print(f"  NMV Change:      {hist_metrics['nmv_change_pct']:>+.2f}%")
print(f"  Quantity Change: {hist_metrics['qty_change_pct']:>+.2f}%")
print(f"  GP Change:       {hist_metrics['gp_change_pct']:>+.2f}%")
print(f"  Blended Margin:  {hist_metrics['blended_margin']*100:.2f}%")

# Find closest scenario
closest_scenario = None
min_diff = float('inf')
for scenario_name, (base, t1, t2, ws) in scenarios.items():
    diff = abs(base - overall_base_pct) + abs(t1 - overall_t1_pct) + abs(t2 - overall_t2_pct) + abs(ws - overall_ws_pct)
    if diff < min_diff:
        min_diff = diff
        closest_scenario = scenario_name

print(f"\nClosest Hypothetical Scenario: {closest_scenario}")

# =============================================================================
# WHAT-IF SCENARIO: Shift 30% from Base to Tiers
# =============================================================================
print("\n" + "=" * 120)
print("WHAT-IF SCENARIO: Move 30% of Base Orders to Tiers")
print("=" * 120)

# Calculate new conversion rates by shifting 30% from base to tiers
SHIFT_PCT = 20  # Percentage points to shift from base

# New base rate (reduced by 30 pp)
new_base_pct = max(overall_base_pct - SHIFT_PCT, 0)

# Distribute the shifted percentage to tiers proportionally
# Based on existing tier ratios (excluding base)
tier_total = overall_t1_pct + overall_t2_pct + overall_ws_pct

if tier_total > 0:
    # Distribute proportionally to existing tier distribution
    t1_share = overall_t1_pct / tier_total
    t2_share = overall_t2_pct / tier_total
    ws_share = overall_ws_pct / tier_total
    
    new_t1_pct = overall_t1_pct + (SHIFT_PCT * t1_share)
    new_t2_pct = overall_t2_pct + (SHIFT_PCT * t2_share)
    new_ws_pct = overall_ws_pct + (SHIFT_PCT * ws_share)
else:
    # If no tier conversion exists, split evenly
    new_t1_pct = overall_t1_pct + (SHIFT_PCT * 0.5)
    new_t2_pct = overall_t2_pct + (SHIFT_PCT * 0.3)
    new_ws_pct = overall_ws_pct + (SHIFT_PCT * 0.2)

print(f"\nConversion Rate Comparison:")
print(f"{'':>25} {'Historical':>15} {'What-If (+30%)':>15} {'Change':>12}")
print("-" * 70)
print(f"{'Base (no tier)':<25} {overall_base_pct:>14.1f}% {new_base_pct:>14.1f}% {new_base_pct - overall_base_pct:>+11.1f} pp")
print(f"{'Tier 1':<25} {overall_t1_pct:>14.1f}% {new_t1_pct:>14.1f}% {new_t1_pct - overall_t1_pct:>+11.1f} pp")
print(f"{'Tier 2':<25} {overall_t2_pct:>14.1f}% {new_t2_pct:>14.1f}% {new_t2_pct - overall_t2_pct:>+11.1f} pp")
print(f"{'Wholesale':<25} {overall_ws_pct:>14.1f}% {new_ws_pct:>14.1f}% {new_ws_pct - overall_ws_pct:>+11.1f} pp")
print(f"{'TOTAL':<25} {overall_base_pct + overall_t1_pct + overall_t2_pct + overall_ws_pct:>14.1f}% {new_base_pct + new_t1_pct + new_t2_pct + new_ws_pct:>14.1f}%")

# Calculate metrics for what-if scenario
whatif_metrics = calculate_blended_metrics_with_gp(final_data, new_base_pct, new_t1_pct, new_t2_pct, new_ws_pct, NUM_ORDERS)

# Also calculate using actual order count from historical data
whatif_simulation = simulation_data.copy()

# Recalculate with new conversion rates
whatif_simulation['whatif_nmv_per_order'] = (
    (new_base_pct / 100) * whatif_simulation['median_qty'] * whatif_simulation['packing_unit_price'] +
    (new_t1_pct / 100) * whatif_simulation['tier_1_qty'] * whatif_simulation['tier_1_price'] +
    (new_t2_pct / 100) * whatif_simulation['tier_2_qty'] * whatif_simulation['tier_2_price'] +
    (new_ws_pct / 100) * whatif_simulation['ws_qty_filled'] * whatif_simulation['ws_price_filled']
)

whatif_simulation['whatif_cogs_per_order'] = (
    (new_base_pct / 100) * whatif_simulation['median_qty'] * whatif_simulation['wac_p'] +
    (new_t1_pct / 100) * whatif_simulation['tier_1_qty'] * whatif_simulation['wac_p'] +
    (new_t2_pct / 100) * whatif_simulation['tier_2_qty'] * whatif_simulation['wac_p'] +
    (new_ws_pct / 100) * whatif_simulation['ws_qty_filled'] * whatif_simulation['wac_p']
)

whatif_simulation['whatif_qty_per_order'] = (
    (new_base_pct / 100) * whatif_simulation['median_qty'] +
    (new_t1_pct / 100) * whatif_simulation['tier_1_qty'] +
    (new_t2_pct / 100) * whatif_simulation['tier_2_qty'] +
    (new_ws_pct / 100) * whatif_simulation['ws_qty_filled']
)

# Calculate totals with actual historical order counts
whatif_simulation['total_whatif_nmv'] = whatif_simulation['whatif_nmv_per_order'] * whatif_simulation['total_orders']
whatif_simulation['total_whatif_cogs'] = whatif_simulation['whatif_cogs_per_order'] * whatif_simulation['total_orders']
whatif_simulation['total_whatif_gp'] = whatif_simulation['total_whatif_nmv'] - whatif_simulation['total_whatif_cogs']
whatif_simulation['total_whatif_qty'] = whatif_simulation['whatif_qty_per_order'] * whatif_simulation['total_orders']

# Aggregate totals
total_whatif_nmv = whatif_simulation['total_whatif_nmv'].sum()
total_whatif_qty = whatif_simulation['total_whatif_qty'].sum()
total_whatif_gp = whatif_simulation['total_whatif_gp'].sum()

# Calculate changes vs current and vs historical blended
print(f"\n{'METRIC':<25} {'CURRENT':>15} {'HISTORICAL':>15} {'WHAT-IF':>15} {'vs Current':>12} {'vs Historical':>14}")
print("-" * 110)
print(f"{'Total NMV (EGP)':<25} {total_current_nmv:>14,.0f} {total_blended_nmv:>14,.0f} {total_whatif_nmv:>14,.0f} {(total_whatif_nmv - total_current_nmv):>+11,.0f} {(total_whatif_nmv - total_blended_nmv):>+13,.0f}")
print(f"{'Total Quantity':<25} {total_current_qty:>14,.0f} {total_blended_qty:>14,.0f} {total_whatif_qty:>14,.0f} {(total_whatif_qty - total_current_qty):>+11,.0f} {(total_whatif_qty - total_blended_qty):>+13,.0f}")
print(f"{'Total Gross Profit':<25} {total_current_gp:>14,.0f} {total_blended_gp:>14,.0f} {total_whatif_gp:>14,.0f} {(total_whatif_gp - total_current_gp):>+11,.0f} {(total_whatif_gp - total_blended_gp):>+13,.0f}")

current_margin_pct = (total_current_gp / total_current_nmv) * 100
historical_margin_pct = (total_blended_gp / total_blended_nmv) * 100
whatif_margin_pct = (total_whatif_gp / total_whatif_nmv) * 100

print(f"{'Gross Margin':<25} {current_margin_pct:>13.2f}% {historical_margin_pct:>13.2f}% {whatif_margin_pct:>13.2f}% {(whatif_margin_pct - current_margin_pct):>+10.2f} pp {(whatif_margin_pct - historical_margin_pct):>+12.2f} pp")

# Summary comparison
print(f"\n{'':=^110}")
print(f"{'IMPACT SUMMARY':^110}")
print(f"{'':=^110}")

print(f"""
If we shift {SHIFT_PCT}% of base orders to tiers:

📈 vs CURRENT STATE (100% base):
   • NMV increases by:          {(total_whatif_nmv - total_current_nmv):>+15,.0f} EGP  ({((total_whatif_nmv - total_current_nmv) / total_current_nmv) * 100:>+6.2f}%)
   • Quantity increases by:     {(total_whatif_qty - total_current_qty):>+15,.0f} units ({((total_whatif_qty - total_current_qty) / total_current_qty) * 100:>+6.2f}%)
   • Gross Profit changes by:   {(total_whatif_gp - total_current_gp):>+15,.0f} EGP  ({((total_whatif_gp - total_current_gp) / total_current_gp) * 100:>+6.2f}%)
   • Margin changes:            {(whatif_margin_pct - current_margin_pct):>+15.2f} pp

📊 vs HISTORICAL CONVERSION ({overall_base_pct:.0f}/{overall_t1_pct:.0f}/{overall_t2_pct:.0f}/{overall_ws_pct:.0f}):
   • NMV additional:            {(total_whatif_nmv - total_blended_nmv):>+15,.0f} EGP  ({((total_whatif_nmv - total_blended_nmv) / total_blended_nmv) * 100:>+6.2f}%)
   • Quantity additional:       {(total_whatif_qty - total_blended_qty):>+15,.0f} units ({((total_whatif_qty - total_blended_qty) / total_blended_qty) * 100:>+6.2f}%)
   • GP additional:             {(total_whatif_gp - total_blended_gp):>+15,.0f} EGP  ({((total_whatif_gp - total_blended_gp) / total_blended_gp) * 100:>+6.2f}%)
   • Margin change:             {(whatif_margin_pct - historical_margin_pct):>+15.2f} pp
""")

# Key insight
print("=" * 120)
print("KEY INSIGHT")
print("=" * 120)
print(f"""
Based on historical conversion rates:
• NMV {'INCREASES' if nmv_change > 0 else 'DECREASES'} by {abs(nmv_change):,.0f} EGP ({(nmv_change/total_current_nmv)*100:+.1f}%)
  → This is because retailers order MORE quantity to reach tier thresholds

• Gross Profit {'INCREASES' if gp_change > 0 else 'DECREASES'} by {abs(gp_change):,.0f} EGP ({(gp_change/total_current_gp)*100:+.1f}%)
  → {'Higher volume offsets lower price per unit' if gp_change > 0 else 'Lower prices reduce GP despite higher volume'}

• Gross Margin changes from {(total_current_gp/total_current_nmv)*100:.2f}% to {(total_blended_gp/total_blended_nmv)*100:.2f}%
  → {'Margin compression' if (total_blended_gp/total_blended_nmv) < (total_current_gp/total_current_nmv) else 'Margin improvement'} of {abs((total_blended_gp/total_blended_nmv)-(total_current_gp/total_current_nmv))*100:.2f} pp

With additional {SHIFT_PCT}% conversion (What-If):
• Additional NMV opportunity: {(total_whatif_nmv - total_blended_nmv):+,.0f} EGP
• Additional GP opportunity:  {(total_whatif_gp - total_blended_gp):+,.0f} EGP
""")

# Save simulation results
simulation_file = 'QD_simulation_results.xlsx'
with pd.ExcelWriter(simulation_file, engine='openpyxl') as writer:
    scenarios_df.to_excel(writer, sheet_name='Scenarios', index=False)
    simulation_data.to_excel(writer, sheet_name='Historical_Simulation', index=False)
    warehouse_summary.to_excel(writer, sheet_name='Warehouse_Summary', index=False)

print(f"✓ Simulation results saved to '{simulation_file}'")



HISTORICAL SIMULATION - NMV, QUANTITY & GROSS PROFIT IMPACT

Based on 486,667 historical orders across 2040 SKUs
Conversion: Base 87.4% | T1 8.2% | T2 4.4% | WS 0.0%

METRIC                               CURRENT            BLENDED             CHANGE     CHANGE %
-----------------------------------------------------------------------------------------------
Total NMV (EGP)                 133,914,917       190,088,288       +56,173,370      +41.95%
Total Quantity (units)              765,869         1,075,834          +309,965      +40.47%
Total COGS (EGP)                124,857,687       178,122,661       +53,264,974      +42.66%
Total Gross Profit (EGP)          9,057,230        11,965,627        +2,908,397      +32.11%
Gross Margin (%)                       6.76%              6.29%             -0.47 pp

WAREHOUSE-LEVEL BREAKDOWN

   WH  Orders  Base%   T1%   T2%   WS% |        NMV Δ   NMV Δ% |         GP Δ    GP Δ% |  Margin
----------------------------------------------------------

## 7. Final Ranking & Export


In [48]:
final_data['check_qty'] = final_data['tier_2_qty']/final_data['tier_1_qty']
final_data['target_qty_ratio'] = final_data['discount_ratio']/2
final_data['target_tier_2_q'] = np.round(final_data['target_qty_ratio']*final_data['tier_1_qty'])
final_data.loc[(final_data['check_qty']<1.3)&(final_data['elasticity_ratio']>3),'tier_2_qty'] = final_data['target_tier_2_q']

In [49]:
# =============================================================================
# CREATE UPLOAD FORMAT
# =============================================================================
# Format: ONE row per warehouse_id
# - Discounts Group 1: List of [tier 1 items + wholesale items] (max 200, overflow goes to Group 2)
# - Discounts Group 2: List of [tier 2 items + overflow from Group 1]
# Each item format: [product_id, packing_unit_id, quantity, discount_pct]

MAX_GROUP_SIZE = 200
MAX_DISCOUNT_CAP_t1 = 4.0
MAX_DISCOUNT_CAP_t2 = 5.0
MAX_DISCOUNT_CAP_ws = 6.0# Maximum discount capped at 6%

final_quantity_discount = pd.DataFrame(columns=['warehouse_id', 'Discounts Group 1', 'Discounts Group 2', 'Description'])

for wh_id in final_data.warehouse_id.unique():
    warehouse_data = final_data[final_data['warehouse_id'] == wh_id]
    warehouse_id = int(wh_id)
    
    # Collect all tier 1 items
    tier_1_items = []
    # Collect all tier 2 items
    tier_2_items = []
    # Collect all wholesale items
    ws_items = []
    
    for i, r in warehouse_data.iterrows():
        product_id = int(r['product_id'])
        packing_unit_id = int(r['packing_unit_id'])
        current_price = r['packing_unit_price']
        
        # Tier 1 (cap discount at MAX_DISCOUNT_CAP)
        q_1 = int(r['tier_1_qty'])
        d_1 = min(round(r['discount_1_pct'], 2), MAX_DISCOUNT_CAP_t1)
        tier_1_items.append([product_id, packing_unit_id, q_1, d_1])
        
        # Tier 2 (cap discount at MAX_DISCOUNT_CAP)
        q_2 = int(r['tier_2_qty'])
        d_2 = min(round(r['discount_2_pct'], 2), MAX_DISCOUNT_CAP_t2)
        tier_2_items.append([product_id, packing_unit_id, q_2, d_2])
        
        # Wholesale (new logic) - cap discount at MAX_DISCOUNT_CAP
        ws_qty = r.get('ws_new_qty', None)
        ws_price = r.get('ws_new_price', None)
        
        if pd.notna(ws_qty) and pd.notna(ws_price) and ws_qty > 0 and current_price > 0:
            q_ws = int(ws_qty)
            d_ws = min(round(((current_price - ws_price) / current_price) * 100, 2), MAX_DISCOUNT_CAP_ws)
            ws_items.append([product_id, packing_unit_id, q_ws, d_ws])
    
    # Group 1: Tier 1 + Wholesale (max 200)
    group_1_items = tier_1_items + ws_items
    
    # Group 2: Tier 2 + overflow from Group 1
    if len(group_1_items) > MAX_GROUP_SIZE:
        # Overflow goes to Group 2
        overflow = group_1_items[MAX_GROUP_SIZE:]
        group_1_items = group_1_items[:MAX_GROUP_SIZE]
        group_2_items = tier_2_items + overflow
    else:
        group_2_items = tier_2_items  
    
    new_row = {
        'warehouse_id': warehouse_id,
        'Discounts Group 1': group_1_items,
        'Discounts Group 2': group_2_items,
        'Description': f'{warehouse_id}QD'
    }
    final_quantity_discount = pd.concat([final_quantity_discount, pd.DataFrame([new_row])], ignore_index=True)

# Summary
print(f"Upload format created: {len(final_quantity_discount)} warehouse rows")
print(f"\nPer warehouse breakdown:")
for idx, row in final_quantity_discount.iterrows():
    wh = row['warehouse_id']
    g1_count = len(row['Discounts Group 1'])
    g2_count = len(row['Discounts Group 2'])
    print(f"  WH {wh}: Group 1 = {g1_count} items, Group 2 = {g2_count} items")

# # =============================================================================
# # SAVE FILES
# # =============================================================================

# # Save detailed data
detailed_file = 'QD_detailed.xlsx'
final_data.to_excel(detailed_file, index=False)
print(f"\n=== DETAILED FILE ===")
print(f"Saved {len(final_data)} SKUs to '{detailed_file}'")

# Save upload format
upload_file = 'QD_Data.xlsx'
final_quantity_discount.to_excel(upload_file, index=False)
print(f"\n=== UPLOAD FILE ===")
print(f"Saved {len(final_quantity_discount)} rows to '{upload_file}'")
print(f"Columns: {list(final_quantity_discount.columns)}")

Upload format created: 12 warehouse rows

Per warehouse breakdown:
  WH 1: Group 1 = 200 items, Group 2 = 198 items
  WH 8: Group 1 = 200 items, Group 2 = 198 items
  WH 170: Group 1 = 200 items, Group 2 = 199 items
  WH 236: Group 1 = 200 items, Group 2 = 200 items
  WH 337: Group 1 = 200 items, Group 2 = 200 items
  WH 339: Group 1 = 200 items, Group 2 = 199 items
  WH 401: Group 1 = 200 items, Group 2 = 198 items
  WH 501: Group 1 = 200 items, Group 2 = 198 items
  WH 632: Group 1 = 200 items, Group 2 = 198 items
  WH 703: Group 1 = 200 items, Group 2 = 200 items
  WH 797: Group 1 = 200 items, Group 2 = 200 items
  WH 962: Group 1 = 200 items, Group 2 = 200 items

=== DETAILED FILE ===
Saved 2040 SKUs to 'QD_detailed.xlsx'

=== UPLOAD FILE ===
Saved 12 rows to 'QD_Data.xlsx'
Columns: ['warehouse_id', 'Discounts Group 1', 'Discounts Group 2', 'Description']


In [50]:
# Warehouse to Tag ID mapping for upload
df_warehouse_mapping = pd.DataFrame({
    'warehouse_name': ['Assiut FC', 'Bani sweif', 'Barageel', 'El-Mahala', 'Khorshed Alex', 
                       'Mansoura FC', 'Menya Samalot', 'Mostorod', 'Sakkarah', 'Sharqya', 
                       'Sohag', 'Tanta'],
    'warehouse_id':   [501, 401, 236, 337, 797, 339, 703, 1, 962, 170, 632, 8],
    'tag_id':         [3301, 3302, 3303, 3304, 3305, 3306, 3307, 3308, 3309, 3310, 3311, 3312]
})


In [54]:
# Merge upload data with warehouse mapping
to_upload = final_quantity_discount.merge(df_warehouse_mapping, on='warehouse_id')

In [55]:
# =============================================================================
# PREPARE FINAL UPLOAD FILE
# =============================================================================

# Set description and date/time fields
to_upload['Description'] = (
    to_upload['warehouse_name'].astype(str)
    .str.replace(' ', '')
    + "QD"
)
to_upload['Description'] = to_upload['Description'].str.replace("-","") 

local_tz = pytz.timezone('Africa/Cairo')

start_date = datetime.now(local_tz) + timedelta(minutes=10)
start_date_str = start_date.strftime('%d/%m/%Y %H:%M')

end_date = datetime.now(local_tz) + timedelta(days=1)
end_date = end_date.replace(hour=12, minute=59, second=0, microsecond=0)
end_date_str = end_date.strftime('%d/%m/%Y %H:%M')
print(start_date_str,end_date_str)
to_upload['Start Date/Time'] = start_date_str
to_upload['End Date/Time'] = end_date_str
to_upload = to_upload.rename(columns={'tag_id': 'Tag ID'})

to_upload=to_upload[['Tag ID', 'Description', 'Start Date/Time', 'End Date/Time','Discounts Group 1', 'Discounts Group 2']]
# Save upload file
to_upload.to_excel('QD_upload.xlsx', index=False)
print(f"✓ Saved upload file: QD_upload.xlsx ({len(to_upload)} warehouses)")

25/01/2026 15:12 26/01/2026 12:59
✓ Saved upload file: QD_upload.xlsx (12 warehouses)


In [56]:
# =============================================================================
# UPLOAD TO API
# =============================================================================

print("Uploading QD file to API...")
response = post_QD('QD_upload.xlsx')

if response.ok:
    print(f"✓ Upload succeeded (status: {response.status_code})")
else:
    print(f"❌ Upload failed (status: {response.status_code})")
    print(response.content)

Uploading QD file to API...
✓ Upload succeeded (status: 200)


In [57]:
# =============================================================================
# PREPARE CART RULES UPDATE
# =============================================================================

# Merge current cart rules with new tier data
cart_rules_update = live_cart_rules.merge(
    final_data[['warehouse_id', 'product_id', 'packing_unit_id', 'tier_2_qty', 'ws_new_qty']],
    on=['warehouse_id', 'product_id', 'packing_unit_id']
)
cart_rules_update = cart_rules_update.fillna(0)

# New cart rule = max of tier_2_qty and ws_new_qty
cart_rules_update['tier_2'] = np.maximum(cart_rules_update['tier_2_qty'], cart_rules_update['ws_new_qty'])

# Only update rules that need to increase
cart_rules_update = cart_rules_update[cart_rules_update['tier_2'] > cart_rules_update['current_cart_rule']]
cart_rules_update = cart_rules_update[['cohort_id', 'product_id', 'packing_unit_id', 'tier_2']]
cart_rules_update=cart_rules_update.groupby(['cohort_id', 'product_id', 'packing_unit_id'])['tier_2'].max().reset_index()
print(f"✓ Cart rules to update: {len(cart_rules_update)} products across {cart_rules_update['cohort_id'].nunique()} cohorts")

✓ Cart rules to update: 245 products across 9 cohorts


In [58]:
cart_rules_update=cart_rules_update.drop_duplicates()
cart_rules_update

Unnamed: 0,cohort_id,product_id,packing_unit_id,tier_2
0,700,9,1,37.0
1,700,38,1,9.0
2,700,336,15,113.0
3,700,414,1,93.0
4,700,2878,1,7.0
...,...,...,...,...
240,1126,18964,1,80.0
241,1126,21712,3,1624.0
242,1126,21713,3,1312.0
243,1126,22125,2,73.0


In [59]:
# =============================================================================
# UPLOAD CART RULES BY COHORT
# =============================================================================

print("Uploading cart rules by cohort...")

for cohort in cart_rules_update.cohort_id.unique():
    req_data = cart_rules_update[cart_rules_update['cohort_id'] == cohort]
    
    if len(req_data) > 0:
        # Prepare data for upload
        req_data = req_data[['product_id', 'packing_unit_id', 'tier_2']]
        req_data.columns = ['Product ID', 'Packing Unit ID', 'Cart Rules']
        
        # Save and upload
        filename = f'CartRules_{cohort}.xlsx'
        req_data.to_excel(filename, index=False, engine='xlsxwriter')
        
        time.sleep(5)
        response = post_cart_rules(cohort, filename)
        
        if response.ok:
            print(f"  ✓ Cohort {cohort}: {len(req_data)} rules uploaded")
        else:
            print(f"  ❌ Cohort {cohort}: Upload failed")
            print(response.content)
            break

print("\n✓ Cart rules upload complete!")

Uploading cart rules by cohort...
  ✓ Cohort 700: 13 rules uploaded
  ✓ Cohort 701: 35 rules uploaded
  ✓ Cohort 702: 26 rules uploaded
  ✓ Cohort 703: 17 rules uploaded
  ✓ Cohort 704: 44 rules uploaded
  ✓ Cohort 1123: 26 rules uploaded
  ✓ Cohort 1124: 27 rules uploaded
  ✓ Cohort 1125: 27 rules uploaded
  ✓ Cohort 1126: 30 rules uploaded

✓ Cart rules upload complete!
