# Quantity Discount (QD) Pricing System

This notebook calculates tiered pricing and quantities for products across warehouses.

## Workflow:
1. **Setup** - Imports, connections, and configuration
2. **Product Selection** - Select top products per warehouse based on performance
3. **Quantity Tiers** - Calculate tier 1 and tier 2 quantities based on order history
4. **Market Prices** - Gather competitive pricing data
5. **Price Tiers** - Calculate discounted prices for each tier
6. **Wholesale Pricing** - Calculate wholesale prices for bulk orders
7. **Export** - Save results to Excel


## 1. Setup & Imports


In [None]:
# =============================================================================
# STANDARD LIBRARY IMPORTS
# =============================================================================
import os
import json
import time
import base64
import calendar
import warnings
from pathlib import Path
from datetime import datetime, date, timedelta

# =============================================================================
# THIRD-PARTY IMPORTS
# =============================================================================
import numpy as np
import pandas as pd
import pytz
import requests
import gspread
import boto3
import snowflake.connector
from tqdm import tqdm
from requests import get
from botocore.exceptions import ClientError
from oauth2client.service_account import ServiceAccountCredentials

# =============================================================================
# LOCAL IMPORTS & ENVIRONMENT SETUP
# =============================================================================
import setup_environment_2
import importlib
import import_ipynb

warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

  warn_incompatible_dep(


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


### Configuration Constants


In [None]:
# =============================================================================
# CONFIGURATION - Modify these parameters as needed
# =============================================================================

# -----------------------------------------------------------------------------
# Cohort IDs for QD program
# -----------------------------------------------------------------------------
COHORT_IDS = [700, 701, 702, 703, 704, 1123, 1124, 1125, 1126]

# -----------------------------------------------------------------------------
# Warehouse mappings: (region, warehouse_name, warehouse_id, cohort_id)
# -----------------------------------------------------------------------------
WAREHOUSE_MAPPING = [
    ('Cairo',       'El-Marg',       38,  700),
    ('Cairo',       'Mostorod',      1,   700),
    ('Giza',        'Barageel',      236, 701),
    ('Giza',        'Sakkarah',      962, 701),
    ('Delta West',  'El-Mahala',     337, 703),
    ('Delta West',  'Tanta',         8,   703),
    ('Delta East',  'Mansoura FC',   339, 704),
    ('Delta East',  'Sharqya',       170, 704),
    ('Upper Egypt', 'Assiut FC',     501, 1124),
    ('Upper Egypt', 'Bani sweif',    401, 1126),
    ('Upper Egypt', 'Menya Samalot', 703, 1123),
    ('Upper Egypt', 'Sohag',         632, 1125),
    ('Alexandria',  'Khorshed Alex', 797, 702),
]

# Excluded warehouse IDs
EXCLUDED_WAREHOUSES = [6, 9, 10]

# Products to exclude from selection
PRODUCTS_TO_REMOVE = [7630]

# -----------------------------------------------------------------------------
# Pricing Parameters
# -----------------------------------------------------------------------------
MAX_DISCOUNT_PCT = 5.0    # Maximum discount allowed from current price (%)
MIN_DISCOUNT_PCT = 0.35   # Minimum discount required from current price (%)
MIN_RATIO        = 1.1    # Minimum discount-to-quantity ratio
MAX_RATIO        = 3      # Maximum discount-to-quantity ratio

# -----------------------------------------------------------------------------
# Product Selection Thresholds
# -----------------------------------------------------------------------------
MIN_ORDERS    = 20    # Minimum orders in 4 months
MIN_RETAILERS = 5     # Minimum unique retailers
MIN_NMV       = 5000  # Minimum revenue (EGP)
MIN_VELOCITY  = 0.5   # Minimum units per day

# -----------------------------------------------------------------------------
# Ranking Parameters
# -----------------------------------------------------------------------------
TOP_PRODUCTS_PER_WAREHOUSE   = 200  # Initial selection
FINAL_PRODUCTS_PER_WAREHOUSE = 133  # Final output

# -----------------------------------------------------------------------------
# Delivery Fees
# -----------------------------------------------------------------------------
DELIVERY_FEE_CAIRO_GIZA = 25
DELIVERY_FEE_OTHER      = 20

print("✓ Configuration loaded successfully!")


Configuration loaded successfully!


### Functions

In [None]:
def get_secret(secret_name):
    """
    Retrieve secret from AWS Secrets Manager.
    
    Args:
        secret_name: Name/ID of the secret to retrieve
        
    Returns:
        Secret string or decoded binary
    """
    region_name = "us-east-1"
    
    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    
    try:
        get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    except ClientError as e:
        # Re-raise all AWS Secrets Manager exceptions
        raise e
    
    # Return decrypted secret (string or binary)
    if 'SecretString' in get_secret_value_response:
        return get_secret_value_response['SecretString']
    else:
        return base64.b64decode(get_secret_value_response['SecretBinary'])

In [None]:
# Load API credentials from AWS Secrets Manager
pricing_api_secret = json.loads(get_secret("prod/pricing/api/"))
username = pricing_api_secret["egypt_username"]
password = pricing_api_secret["egypt_password"]
secret   = pricing_api_secret["egypt_secret"]

print("✓ API credentials loaded")

In [None]:
def get_access_token(url, client_id, client_secret):
    """
    Get OAuth access token for MaxAB APIs.
    
    Args:
        url: Token endpoint URL
        client_id: OAuth client ID
        client_secret: OAuth client secret
        
    Returns:
        Access token string
    """
    response = requests.post(
        url,
        data={
            "grant_type": "password",
            "username": username,
            "password": password
        },
        auth=(client_id, client_secret),
    )
    return response.json()["access_token"]

In [None]:
def post_QD(file_name):
    """
    Upload Quantity Discount file to MaxAB API.
    
    Args:
        file_name: Path to the Excel file to upload
        
    Returns:
        API response object
    """
    token = get_access_token(
        'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
        'main-system-externals',
        secret
    )
    
    url = "https://api.maxab.info/commerce/api/admins/v1/quantity-discounts"
    
    files = [
        ('file', (file_name, open(file_name, 'rb'), 
                  'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))
    ]
    headers = {'Authorization': f'bearer {token}'}
    
    response = requests.request("POST", url, headers=headers, data={}, files=files)
    return response

In [None]:
def post_cart_rules(cohort_id, file_name):
    """
    Upload Cart Rules file for a specific cohort.
    
    Args:
        cohort_id: ID of the cohort to update
        file_name: Path to the Excel file to upload
        
    Returns:
        API response object
    """
    token = get_access_token(
        'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
        'main-system-externals',
        secret
    )
    
    url = f"https://api.maxab.info/main-system/api/admin-portal/cohorts/{cohort_id}/cart-rules"
    
    files = [
        ('sheet', (file_name, open(file_name, 'rb'),
                   'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))
    ]
    headers = {'Authorization': f'bearer {token}'}
    
    response = requests.request("POST", url, headers=headers, data={}, files=files)
    return response

### Database Connection Function


In [None]:
def snowflake_query(country, query, warehouse=None, columns=[], conn=None):
    """
    Execute a query against Snowflake and return results as DataFrame.
    
    Args:
        country: Country identifier (e.g., "Egypt")
        query: SQL query string to execute
        warehouse: Snowflake warehouse (optional)
        columns: Custom column names (optional)
        conn: Existing connection (optional)
        
    Returns:
        pandas DataFrame with query results
    """
    con = snowflake.connector.connect(
        user     = os.environ["SNOWFLAKE_USERNAME"],
        account  = os.environ["SNOWFLAKE_ACCOUNT"],
        password = os.environ["SNOWFLAKE_PASSWORD"],
        database = os.environ["SNOWFLAKE_DATABASE"]
    )

    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        
        column_names = [col[0] for col in cur.description]
        results = cur.fetchall()
        
        if not results:
            out = pd.DataFrame(columns=[name.lower() for name in column_names])
        else:
            if len(columns) == 0:
                out = pd.DataFrame(np.array(results), columns=column_names)
                out.columns = out.columns.str.lower()
            else:
                out = pd.DataFrame(np.array(results), columns=columns)
                out.columns = out.columns.str.lower()
        
        return out
        
    except Exception as e:
        print(f"❌ Query error: {e}")
        raise
        
    finally:
        cur.close()
        con.close()

In [None]:
# Get Snowflake timezone for consistent date/time handling
query = "SHOW PARAMETERS LIKE 'TIMEZONE'"
timezone_result = snowflake_query("Egypt", query)
zone_to_use = timezone_result['value'].values[0]
print(f"✓ Using timezone: {zone_to_use}")

### Google Sheets Connection (Force Brands)


## 2. Product Selection

Select top-performing products per warehouse based on:
- Gross profit ranking (40% weight)
- Sales velocity ranking (25% weight)
- Order count ranking (20% weight)
- Retailer count ranking (15% weight)


In [None]:
query = ''' 
WITH rr AS (
    SELECT product_id, warehouse_id, rr
    FROM (
        SELECT *, 
               MAX(date) OVER (PARTITION BY product_id, warehouse_id) as max_date
        FROM finance.PREDICTED_RUNNING_RATES
        QUALIFY date = max_date
            AND date::date >= CURRENT_DATE - 14 
    )
),

stocks AS (
    SELECT 
        warehouse_id,
        product_id,
        SUM(stocks) as stocks,
        CASE 
            WHEN SUM(rr) > 0 THEN SUM(stocks) / SUM(rr) 
            ELSE SUM(stocks) 
        END as doh
    FROM (
        SELECT DISTINCT 
            product_warehouse.warehouse_id,
            product_warehouse.product_id,
            (product_warehouse.available_stock)::integer as stocks,
            COALESCE(rr.rr, 0) as rr 
        FROM product_warehouse
        JOIN products ON product_warehouse.product_id = products.id
        JOIN product_units ON products.unit_id = product_units.id
        LEFT JOIN rr ON rr.product_id = products.id 
            AND rr.warehouse_id = product_warehouse.warehouse_id
        WHERE product_warehouse.warehouse_id NOT IN (6, 9, 10)
            AND product_warehouse.is_basic_unit = 1
            AND product_warehouse.available_stock > 0 
    )
    GROUP BY warehouse_id, product_id
    HAVING doh >= 1
),

base AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
    FROM (
        SELECT x.*, TAGGABLE_ID as retailer_id 
        FROM (
            SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
            FROM cohorts 
            WHERE is_active = 'true'
                AND id IN (700,701,702,703,704,1123,1124,1125,1126)
        ) x 
        JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
        WHERE dt.taggable_id not IN (
            SELECT taggable_id FROM DYNAMIC_TAGgables 
            WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
        )
    )
    QUALIFY rnk = 1 
    ORDER BY cohort_id
),

-- Count total retailers per warehouse for penetration calculation
warehouse_retailer_counts AS (
    SELECT 
        whs.warehouse_id,
        COUNT(DISTINCT base.retailer_id) as total_warehouse_retailers
    FROM base
    CROSS JOIN (SELECT DISTINCT warehouse_id FROM (VALUES
            (38), (1), (236), (962), (337), (8), (339), (170), 
            (501), (401), (703), (632), (797)
        ) x(warehouse_id)
    ) whs
    GROUP BY whs.warehouse_id
),

-- Map cohorts to warehouses
cohort_warehouse_map AS (
    SELECT cohort_id, warehouse_id
    FROM (VALUES
        (700, 38),   -- Cairo -> El-Marg
        (700, 1),    -- Cairo -> Mostorod
        (701, 236),  -- Giza -> Barageel
        (701, 962),  -- Giza -> Sakkarah
        (703, 337),  -- Delta West -> El-Mahala
        (703, 8),    -- Delta West -> Tanta
        (704, 339),  -- Delta East -> Mansoura FC
        (704, 170),  -- Delta East -> Sharqya
        (1124, 501), -- Upper Egypt -> Assiut FC
        (1126, 401), -- Upper Egypt -> Bani sweif
        (1123, 703), -- Upper Egypt -> Menya Samalot
        (1125, 632), -- Upper Egypt -> Sohag
        (702, 797)   -- Alexandria -> Khorshed Alex
    ) x(cohort_id, warehouse_id)
),

-- Get pricing information by cohort (which maps to warehouse)
cohort_prices AS (
    SELECT  
        cpu.cohort_id,
        pu.product_id,
        pu.packing_unit_id,
        pu.basic_unit_count,
        AVG(cpu.price) as price
    FROM cohort_product_packing_units cpu
    JOIN PACKING_UNIT_PRODUCTS pu ON pu.id = cpu.product_packing_unit_id
    WHERE cpu.cohort_id IN (700,701,702,703,704,1123,1124,1125,1126)
        AND cpu.created_at::date <> '2023-07-31'
        AND cpu.is_customized = true
    GROUP BY 
        cpu.cohort_id,
        pu.product_id,
        pu.packing_unit_id,
        pu.basic_unit_count
),

-- Get live prices by cohort
live_cohort_prices AS (
    SELECT 
        cohort_id,
        product_id,
        pu_id as packing_unit_id,
        buc as basic_unit_count,
        NEW_PRICE as price
    FROM materialized_views.DBDP_PRICES
    WHERE created_at = CURRENT_DATE
        AND DATE_PART('hour', CURRENT_TIME) BETWEEN SPLIT_PART(time_slot, '-', 1)::int AND SPLIT_PART(time_slot, '-', 2)::int
        AND cohort_id IN (700,701,702,703,704,1123,1124,1125,1126)
),

-- Combine live and historical prices (live takes priority)
combined_cohort_prices AS (
    SELECT *
    FROM (
        SELECT *, 1 AS priority FROM live_cohort_prices
        UNION ALL
        SELECT *, 2 AS priority FROM cohort_prices
    )
    QUALIFY ROW_NUMBER() OVER (PARTITION BY cohort_id, product_id, packing_unit_id ORDER BY priority) = 1
),

-- Map cohort prices to warehouse prices
warehouse_prices AS (
    SELECT 
        cwm.warehouse_id,
        ccp.product_id,
        ccp.packing_unit_id,
        ccp.basic_unit_count,
        ccp.price
    FROM combined_cohort_prices ccp
    JOIN cohort_warehouse_map cwm ON cwm.cohort_id = ccp.cohort_id
    WHERE ccp.price IS NOT NULL
),

-- Get sales performance over last 4 months
product_performance AS (
    SELECT 
        w.name as warehouse,
        w.id as warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) as sku,
        brands.name_ar as brand,
        categories.name_ar as category,
        
        -- Core volume metrics
        COUNT(DISTINCT so.parent_sales_order_id) as total_orders,
        COUNT(DISTINCT so.retailer_id) as total_retailers,
        SUM(pso.purchased_item_count) as total_packing_units_sold,
        SUM(pso.purchased_item_count * pso.basic_unit_count) as total_basic_units_sold,
        
        -- Revenue and margin
        SUM(pso.total_price) as total_nmv,
        SUM(COALESCE(f.wac_p, 0) * pso.purchased_item_count * pso.basic_unit_count) as total_cogs,
        (SUM(pso.total_price) - SUM(COALESCE(f.wac_p, 0) * pso.purchased_item_count * pso.basic_unit_count)) / 
            NULLIF(SUM(pso.total_price), 0) as blended_margin,
        
        -- Average order metrics
        AVG(pso.purchased_item_count) as avg_packing_units_per_order,
        
        -- Velocity metrics (units per day)
        SUM(pso.purchased_item_count) / 120.0 as packing_units_per_day
        
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN base ON base.retailer_id = so.retailer_id
    JOIN products ON products.id = pso.product_id
    JOIN brands ON products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id 
        AND categories.name_ar NOT LIKE '%سايب%'
    JOIN finance.all_cogs f ON f.product_id = pso.product_id
        AND f.from_date::date <= so.created_at::date
        AND f.to_date::date > so.created_at::date
    JOIN product_units ON product_units.id = products.unit_id
	join warehouses w on w.id = pso.warehouse_id
    
    WHERE TRUE
        AND so.created_at::date BETWEEN current_date - 60 AND CURRENT_DATE - 1
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
        AND products.activation = 'true'
        AND w.id NOT IN (6, 9, 10)
    
    GROUP BY All
),

-- Add retailer penetration
product_performance_with_penetration AS (
    SELECT 
        pp.*,
        wrc.total_warehouse_retailers,
        (pp.total_retailers * 100.0 / NULLIF(wrc.total_warehouse_retailers, 0)) as retailer_penetration_pct
    FROM product_performance pp
    LEFT JOIN warehouse_retailer_counts wrc ON wrc.warehouse_id = pp.warehouse_id
),

-- Add pricing information at warehouse level
product_performance_with_price AS (
    SELECT 
        pp.*,
        COALESCE(wp.price, 0) as product_price,
        COALESCE(wp.basic_unit_count, 1) as basic_unit_count
    FROM product_performance_with_penetration pp
    LEFT JOIN warehouse_prices wp ON wp.warehouse_id = pp.warehouse_id
        AND wp.product_id = pp.product_id 
        AND wp.packing_unit_id = pp.packing_unit_id
),

-- Add quality filters to focus on high-potential products
qualified_products AS (
    SELECT 
        pp.warehouse,
        pp.warehouse_id,
        pp.product_id,
        pp.packing_unit_id,
        pp.sku,
        pp.brand,
        pp.category,
        pp.total_orders,
        pp.total_retailers,
        pp.total_packing_units_sold,
        pp.total_basic_units_sold,
        pp.total_nmv,
        pp.blended_margin,
        pp.avg_packing_units_per_order,
        pp.packing_units_per_day,
        pp.retailer_penetration_pct,
        pp.product_price,
        pp.basic_unit_count,
        s.doh,
        s.stocks,
        
        -- Calculate a simple volume-based score
        (pp.total_nmv * pp.blended_margin) as gross_profit,
        
        -- Rank by gross profit within warehouse
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY (pp.total_nmv * pp.blended_margin) DESC) as gp_rank,
        
        -- Rank by velocity
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY pp.packing_units_per_day DESC) as velocity_rank,
        
        -- Rank by orders
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY pp.total_orders DESC) as order_rank,
        
        -- Rank by number of retailers
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY pp.total_retailers DESC) as retailer_rank
        
    FROM product_performance_with_price pp
    JOIN stocks s ON s.product_id = pp.product_id 
        AND s.warehouse_id = pp.warehouse_id

),

-- Select top products using a combined scoring approach
top_products AS (
    SELECT 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category,
        total_orders,
        total_retailers,
        total_packing_units_sold,
        total_basic_units_sold,
        ROUND(total_nmv, 2) as total_nmv,
        ROUND(blended_margin * 100, 2) as margin_pct,
        ROUND(avg_packing_units_per_order, 2) as avg_order_qty,
        ROUND(packing_units_per_day, 2) as units_per_day,
        ROUND(retailer_penetration_pct, 1) as retailer_penetration_pct,
        ROUND(gross_profit, 2) as gross_profit,
        ROUND(product_price, 2) as packing_unit_price,
        basic_unit_count,
        ROUND(product_price / NULLIF(basic_unit_count, 0), 2) as price_per_basic_unit,
        gp_rank,
        velocity_rank,
        order_rank,
        retailer_rank,
        ROUND(doh, 2) as days_on_hand,
        stocks as available_stock,
        
        -- Combined score: weighted average of ranks (lower is better)
        (gp_rank * 0.15 + velocity_rank * 0.20 + order_rank * 0.30 + retailer_rank * 0.35) as combined_rank_score
        
    FROM qualified_products
)

SELECT 
    warehouse,
    warehouse_id,
    product_id,
    packing_unit_id,
    sku,
    brand,
    category as cat,
    total_orders,
    total_retailers,
    total_packing_units_sold,
    total_basic_units_sold,
    total_nmv,
    margin_pct,
    avg_order_qty,
    units_per_day,
    retailer_penetration_pct,
    gross_profit,
    packing_unit_price,
    basic_unit_count,
    price_per_basic_unit,
    days_on_hand,
    available_stock,
    gp_rank as gross_profit_rank,
    velocity_rank,
    order_rank,
    retailer_rank,
    ROUND(combined_rank_score, 2) as combined_score,
    ROW_NUMBER() OVER (PARTITION BY warehouse ORDER BY combined_rank_score) as final_rank
FROM top_products
WHERE combined_rank_score <= 500  -- Adjust this to get more/fewer products
qualify final_rank<=200
ORDER BY warehouse, combined_rank_score;
'''

# Execute query and convert numeric columns
print("Fetching product selection data...")
selected_products = snowflake_query("Egypt", query)

for col in selected_products.columns:
    selected_products[col] = pd.to_numeric(selected_products[col], errors='ignore')

print(f"✓ Retrieved {len(selected_products)} products from {selected_products['warehouse_id'].nunique()} warehouses")

In [None]:
# Remove excluded products
selected_products = selected_products[~selected_products['product_id'].isin(PRODUCTS_TO_REMOVE)]
print(f"✓ Selected {len(selected_products)} products after exclusions")

## 3. Quantity Tier Calculation

Calculate tier 1 and tier 2 quantities based on:
- Order history from frequent buyers (2+ orders)
- Statistical analysis (median, Q3, P85, P90, P95)
- IQR outlier removal


In [None]:
selected_df = selected_products[['warehouse_id', 'product_id', 'packing_unit_id']].values.tolist()
tuples_string = ','.join([f"({int(wh_id)}, {int(prod_id)}, {int(pu_id)})" for wh_id, prod_id, pu_id in selected_df])
query = f'''
WITH selected_products AS (
    SELECT warehouse_id, product_id, packing_unit_id
    FROM (VALUES
      {tuples_string}
    ) AS x(warehouse_id, product_id, packing_unit_id)
),

-- Same base filtering as product selection query
-- Retailers in QD cohorts AND in specific dynamic tags
base AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
    FROM (
        SELECT x.*, TAGGABLE_ID as retailer_id 
        FROM (
            SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
            FROM cohorts 
            WHERE is_active = 'true'
                AND id IN (700,701,702,703,704,1123,1124,1125,1126)
        ) x 
        JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
        WHERE dt.taggable_id not IN (
            SELECT taggable_id FROM DYNAMIC_TAGgables 
            WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
        )
    )
    QUALIFY rnk = 1 
),

raw_order_quantities AS (
    SELECT 
        whs.wh as warehouse,
        whs.warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) as sku,
        brands.name_ar as brand,
        categories.name_ar as category,
        so.parent_sales_order_id,
        so.retailer_id,
        so.created_at::date as order_date,
        SUM(pso.purchased_item_count) as order_qty,
        SUM(pso.total_price) as order_value,
        -- ADD RECENCY WEIGHT: Recent orders get higher weight (exponential decay)
        EXP(-0.02 * DATEDIFF('day', so.created_at::date, CURRENT_DATE)) as recency_weight
        
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    -- Filter to only include retailers from base (same cohorts + tags as product selection)
    JOIN base ON base.retailer_id = so.retailer_id
    JOIN products ON products.id = pso.product_id
    JOIN brands ON products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id
    JOIN product_units ON product_units.id = products.unit_id
    JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
    JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
    JOIN cities ON cities.id = districts.city_id
    JOIN states ON states.id = cities.state_id
    JOIN regions ON regions.id = states.region_id
    JOIN (SELECT * FROM (VALUES
            ('Cairo', 'El-Marg', 38),
            ('Cairo', 'Mostorod', 1),
            ('Giza', 'Barageel', 236),
            ('Giza', 'Sakkarah', 962),
            ('Delta West', 'El-Mahala', 337),
            ('Delta West', 'Tanta', 8),
            ('Delta East', 'Mansoura FC', 339),
            ('Delta East', 'Sharqya', 170),
            ('Upper Egypt', 'Assiut FC', 501),
            ('Upper Egypt', 'Bani sweif', 401),
            ('Upper Egypt', 'Menya Samalot', 703),
            ('Upper Egypt', 'Sohag', 632),
            ('Alexandria', 'Khorshed Alex', 797)
        ) x(region_name, wh, warehouse_id)
    ) whs ON whs.region_name = CASE WHEN regions.id = 2 THEN states.name_en ELSE regions.name_en END
    JOIN selected_products sp ON sp.warehouse_id = whs.warehouse_id 
        AND sp.product_id = pso.product_id
        AND sp.packing_unit_id = pso.packing_unit_id
    
    WHERE TRUE
        AND so.created_at::date BETWEEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '4 months') AND CURRENT_DATE - 1
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
        AND products.activation = 'true'
    
    GROUP BY 
        whs.wh,
        whs.warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        products.name_ar,
        products.size,
        product_units.name_ar,
        brands.name_ar,
        categories.name_ar,
        so.parent_sales_order_id,
        so.retailer_id,
        so.created_at::date
),

retailer_frequency AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        retailer_id,
        COUNT(DISTINCT parent_sales_order_id) as order_count,
        COUNT(DISTINCT DATE_TRUNC('week', order_date)) as weeks_ordered,
        MIN(order_date) as first_order_date,
        MAX(order_date) as last_order_date,
        DATEDIFF('day', MIN(order_date), MAX(order_date)) as days_span,
        CASE 
            WHEN COUNT(DISTINCT parent_sales_order_id) > 1 
            THEN DATEDIFF('day', MIN(order_date), MAX(order_date)) / (COUNT(DISTINCT parent_sales_order_id) - 1)
            ELSE NULL 
        END as avg_days_between_orders
    FROM raw_order_quantities
    GROUP BY warehouse_id, product_id, packing_unit_id, retailer_id
),

frequent_buyers AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        retailer_id,
        order_count,
        weeks_ordered,
        avg_days_between_orders
    FROM retailer_frequency
    WHERE order_count >= 2 
       OR weeks_ordered >= 2
),

filtered_orders AS (
    SELECT roq.*
    FROM raw_order_quantities roq
    JOIN frequent_buyers fb 
        ON fb.warehouse_id = roq.warehouse_id
        AND fb.product_id = roq.product_id
        AND fb.packing_unit_id = roq.packing_unit_id
        AND fb.retailer_id = roq.retailer_id
),

initial_stats AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_qty) as q1,
        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_qty) as q3,
        MEDIAN(order_qty) as median_qty,
        STDDEV_POP(order_qty) as stddev_qty,
        AVG(order_qty) as avg_qty
    FROM filtered_orders
    GROUP BY warehouse_id, product_id, packing_unit_id
),

cleaned_orders AS (
    SELECT fo.*
    FROM filtered_orders fo
    JOIN initial_stats ist 
        ON ist.warehouse_id = fo.warehouse_id
        AND ist.product_id = fo.product_id
        AND ist.packing_unit_id = fo.packing_unit_id
    WHERE TRUE
        AND fo.order_qty >= ist.q1 - 1.5 * (ist.q3 - ist.q1)
        AND fo.order_qty <= ist.q3 + 1.5 * (ist.q3 - ist.q1)
        AND (ist.stddev_qty = 0 
             OR ABS(fo.order_qty - ist.avg_qty) <= 3 * ist.stddev_qty)
),

-- MODIFIED: Recent orders stats (last 15 days)
recent_trends AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        -- Weighted average gives more importance to recent orders
        SUM(order_qty * recency_weight) / NULLIF(SUM(recency_weight), 0) as weighted_avg_qty,
        -- Last 15 days statistics
        AVG(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_avg,
        MEDIAN(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_median,
        MAX(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_max,
        COUNT(CASE WHEN order_date >= CURRENT_DATE - 15 THEN 1 END) as last_15d_orders
    FROM cleaned_orders
    GROUP BY warehouse_id, product_id, packing_unit_id
),

quantity_stats AS (
    SELECT 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category,
        
        COUNT(DISTINCT parent_sales_order_id) as total_orders,
        COUNT(DISTINCT retailer_id) as total_retailers,
        
        MIN(order_qty) as min_qty,
        MAX(order_qty) as max_qty,
        AVG(order_qty) as avg_qty,
        MEDIAN(order_qty) as median_qty,
        STDDEV_POP(order_qty) as stddev_qty,
        
        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_qty) as q1_qty,
        PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY order_qty) as q2_qty,
        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_qty) as q3_qty,
        PERCENTILE_CONT(0.85) WITHIN GROUP (ORDER BY order_qty) as p85_qty,
        PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY order_qty) as p90_qty,
        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY order_qty) as p95_qty,
        
        SUM(order_value) as total_revenue,
        AVG(order_value) as avg_order_value
        
    FROM cleaned_orders
    GROUP BY 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category
),

frequency_table AS (
    SELECT
        warehouse_id,
        product_id,
        packing_unit_id,
        order_qty,
        COUNT(DISTINCT parent_sales_order_id) AS freq
    FROM cleaned_orders
    GROUP BY warehouse_id, product_id, packing_unit_id, order_qty
),

lag_lead AS (
    SELECT
        warehouse_id,
        product_id,
        packing_unit_id,
        order_qty,
        freq,
        LAG(freq) OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY order_qty) AS prev_freq,
        LEAD(freq) OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY order_qty) AS next_freq
    FROM frequency_table
),

most_frequent_qty AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        order_qty as mode_qty,
        freq as mode_freq,
        freq * 1.0 / SUM(freq) OVER (PARTITION BY warehouse_id, product_id, packing_unit_id) as mode_contribution
    FROM (
        SELECT *,
               ROW_NUMBER() OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY freq DESC, order_qty DESC) as rn
        FROM lag_lead
        WHERE (freq > COALESCE(prev_freq, -1))
          AND (freq > COALESCE(next_freq, -1))
    )
    WHERE rn = 1
),

frequency_metrics AS (
    SELECT 
        fb.warehouse_id,
        fb.product_id,
        fb.packing_unit_id,
        COUNT(DISTINCT fb.retailer_id) as frequent_retailer_count,
        AVG(fb.order_count) as avg_orders_per_retailer,
        AVG(fb.avg_days_between_orders) as avg_refill_days,
        MEDIAN(fb.avg_days_between_orders) as median_refill_days
    FROM frequent_buyers fb
    GROUP BY fb.warehouse_id, fb.product_id, fb.packing_unit_id
),

tier_calculations AS (
    SELECT 
        qs.*,
        COALESCE(mf.mode_qty, qs.median_qty) as mode_qty,
        COALESCE(mf.mode_freq, 0) as mode_freq,
        COALESCE(mf.mode_contribution, 0) as mode_contribution,
        COALESCE(fm.frequent_retailer_count, 0) as frequent_retailer_count,
        COALESCE(fm.avg_orders_per_retailer, 0) as avg_orders_per_retailer,
        COALESCE(fm.avg_refill_days, 0) as avg_refill_days,
        COALESCE(fm.median_refill_days, 0) as median_refill_days,
        
        -- ADD: Recency metrics
        rt.weighted_avg_qty,
        rt.last_15d_avg,
        rt.last_15d_median,
        rt.last_15d_max,
        rt.last_15d_orders,
        
        -- MODIFIED: Tier 1 with 15-day recency factor
        -- Blends historical median with recent trends (70% historical, 30% recent)
        CEIL(GREATEST(
            (0.7 * qs.median_qty + 0.3 * COALESCE(rt.weighted_avg_qty, qs.median_qty)) + 1.0 * COALESCE(qs.stddev_qty, 1),
            qs.q3_qty,
            COALESCE(mf.mode_qty, qs.median_qty) + GREATEST(3, qs.median_qty * 0.3),
            -- If recent 15 days show growth, adjust upward
            CASE 
                WHEN rt.last_15d_orders >= 3 AND rt.last_15d_median > qs.median_qty 
                THEN rt.last_15d_median * 1.2
                ELSE qs.median_qty * 1.4
            END,
            qs.median_qty + 3
        )) as tier_1_qty,
        
        -- MODIFIED: Tier 2 with 15-day recency factor
        CEIL(GREATEST(
            qs.q3_qty + 1.5 * COALESCE(qs.stddev_qty, 1),
            qs.p85_qty + 1.0 * COALESCE(qs.stddev_qty, 1),
            qs.p90_qty + 0.5 * COALESCE(qs.stddev_qty, 1),
            qs.p95_qty,
            -- Blend historical and weighted average
            (0.6 * qs.median_qty + 0.4 * COALESCE(rt.weighted_avg_qty, qs.median_qty)) * 2.0,
            -- If last 15 days show higher demand, adjust tier 2 upward
            CASE 
                WHEN rt.last_15d_orders >= 3 AND rt.last_15d_max > qs.p90_qty 
                THEN rt.last_15d_max * 1.1
                ELSE qs.median_qty * 2.0
            END
        )) as tier_2_qty_base
        
    FROM quantity_stats qs
    LEFT JOIN most_frequent_qty mf 
        ON mf.warehouse_id = qs.warehouse_id 
        AND mf.product_id = qs.product_id
        AND mf.packing_unit_id = qs.packing_unit_id
    LEFT JOIN frequency_metrics fm
        ON fm.warehouse_id = qs.warehouse_id
        AND fm.product_id = qs.product_id
        AND fm.packing_unit_id = qs.packing_unit_id
    LEFT JOIN recent_trends rt
        ON rt.warehouse_id = qs.warehouse_id
        AND rt.product_id = qs.product_id
        AND rt.packing_unit_id = qs.packing_unit_id
),

tier_adjustments AS (
    SELECT 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category,
        total_orders,
        total_retailers,
        min_qty,
        avg_qty,
        median_qty,
        stddev_qty,
        q1_qty,
        q3_qty,
        p85_qty,
        p90_qty,
        p95_qty,
        max_qty,
        mode_qty,
        mode_freq,
        mode_contribution,
        frequent_retailer_count,
        avg_orders_per_retailer,
        avg_refill_days,
        median_refill_days,
        total_revenue,
        avg_order_value,
        
        -- ADD: Recency metrics to output
        weighted_avg_qty,
        last_15d_avg,
        last_15d_median,
        last_15d_max,
        last_15d_orders,
        
        tier_1_qty,
        LEAST(
            CEIL(GREATEST(
                tier_2_qty_base,
                tier_1_qty * 1.6
            )),
            GREATEST(
                tier_1_qty * 3.5,
                tier_1_qty + 20
            )
        ) as tier_2_qty
        
    FROM tier_calculations
),

retailer_distribution AS (
    SELECT 
        co.warehouse_id,
        co.product_id,
        co.packing_unit_id,
        ta.tier_1_qty,
        ta.tier_2_qty,
        COUNT(DISTINCT CASE 
            WHEN co.order_qty < ta.tier_1_qty THEN co.retailer_id 
        END) as retailers_below_t1,
        COUNT(DISTINCT CASE 
            WHEN co.order_qty >= ta.tier_1_qty AND co.order_qty < ta.tier_2_qty THEN co.retailer_id 
        END) as retailers_at_t1,
        COUNT(DISTINCT CASE 
            WHEN co.order_qty >= ta.tier_2_qty THEN co.retailer_id 
        END) as retailers_at_t2,
        COUNT(CASE 
            WHEN co.order_qty < ta.tier_1_qty THEN 1 
        END) as orders_below_t1,
        COUNT(CASE 
            WHEN co.order_qty >= ta.tier_1_qty AND co.order_qty < ta.tier_2_qty THEN 1 
        END) as orders_at_t1,
        COUNT(CASE 
            WHEN co.order_qty >= ta.tier_2_qty THEN 1 
        END) as orders_at_t2
    FROM cleaned_orders co
    JOIN tier_adjustments ta 
        ON ta.warehouse_id = co.warehouse_id 
        AND ta.product_id = co.product_id
        AND ta.packing_unit_id = co.packing_unit_id
    GROUP BY 
        co.warehouse_id,
        co.product_id,
        co.packing_unit_id,
        ta.tier_1_qty,
        ta.tier_2_qty
)

SELECT 
    ta.warehouse,
    ta.warehouse_id,
    ta.product_id,
    ta.packing_unit_id,
    ta.sku,
    ta.brand,
    ta.category,
    
    ta.frequent_retailer_count,
    ROUND(ta.avg_orders_per_retailer, 2) as avg_orders_per_retailer,
    ROUND(ta.avg_refill_days, 1) as avg_refill_days,
    ROUND(ta.median_refill_days, 1) as median_refill_days,
    
    ta.total_orders,
    ta.total_retailers,
    
    ta.min_qty,
    ROUND(ta.avg_qty, 2) as avg_qty,
    ta.median_qty,
    ROUND(ta.weighted_avg_qty, 2) as weighted_avg_qty,
    ta.q1_qty as q1_25_qty,
    ta.q3_qty as q3_75_qty,
    ta.p85_qty,
    ta.p90_qty,
    ta.p95_qty,
    ta.max_qty,
    ROUND(ta.stddev_qty, 2) as stddev_qty,
    ta.mode_qty,
    ta.mode_freq,
    ROUND(ta.mode_contribution * 100, 1) as mode_pct,
    
    -- MODIFIED: 15-day trend metrics
    ROUND(ta.last_15d_avg, 2) as last_15d_avg,
    ta.last_15d_median,
    ta.last_15d_max,
    ta.last_15d_orders,
    
    ta.tier_1_qty,
    ta.tier_2_qty,
    ROUND((ta.tier_1_qty - ta.median_qty) * 100.0 / NULLIF(ta.median_qty, 0), 1) as tier_1_increase_pct,
    ROUND((ta.tier_2_qty - ta.median_qty) * 100.0 / NULLIF(ta.median_qty, 0), 1) as tier_2_increase_pct,
    ROUND(ta.tier_2_qty * 1.0 / NULLIF(ta.tier_1_qty, 0), 2) as tier_2_to_tier_1_ratio,
    
    rd.retailers_below_t1,
    rd.retailers_at_t1,
    rd.retailers_at_t2,
    
    rd.orders_below_t1,
    rd.orders_at_t1,
    rd.orders_at_t2,
    
    ROUND(100.0 * rd.retailers_below_t1 / NULLIF(ta.total_retailers, 0), 1) as pct_retailers_below_t1,
    ROUND(100.0 * rd.retailers_at_t1 / NULLIF(ta.total_retailers, 0), 1) as pct_retailers_at_t1,
    ROUND(100.0 * rd.retailers_at_t2 / NULLIF(ta.total_retailers, 0), 1) as pct_retailers_at_t2,
    
    ROUND(100.0 * rd.orders_below_t1 / NULLIF(ta.total_orders, 0), 1) as pct_orders_below_t1,
    ROUND(100.0 * rd.orders_at_t1 / NULLIF(ta.total_orders, 0), 1) as pct_orders_at_t1,
    ROUND(100.0 * rd.orders_at_t2 / NULLIF(ta.total_orders, 0), 1) as pct_orders_at_t2,
    
    ROUND(ta.total_revenue, 2) as total_revenue,
    ROUND(ta.avg_order_value, 2) as avg_order_value

FROM tier_adjustments ta
JOIN retailer_distribution rd 
    ON rd.warehouse_id = ta.warehouse_id 
    AND rd.product_id = ta.product_id
    AND rd.packing_unit_id = ta.packing_unit_id
ORDER BY ta.warehouse, ta.total_orders DESC
'''

# Execute query and convert numeric columns
print("Fetching quantity tier data...")
tiers_selection = snowflake_query("Egypt", query)

for col in tiers_selection.columns:
    tiers_selection[col] = pd.to_numeric(tiers_selection[col], errors='ignore')

print(f"✓ Calculated tiers for {len(tiers_selection)} product-warehouse combinations")


### SKU Information & Cost Data


In [None]:
query = f'''
SELECT DISTINCT  
    products.id as product_id,
    CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) as sku,
    brands.name_ar as brand, 
    categories.name_ar as cat,
    f.wac_p
FROM products 
JOIN brands ON products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f ON f.product_id = products.id 
    AND CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) 
        BETWEEN f.from_date AND f.to_date 
JOIN product_units ON product_units.id = products.unit_id 
'''

print("Fetching SKU information and WAC data...")
sku_info = snowflake_query("Egypt", query)
sku_info['product_id'] = pd.to_numeric(sku_info['product_id'])
sku_info['wac_p'] = pd.to_numeric(sku_info['wac_p'])

print(f"✓ Retrieved cost data for {len(sku_info)} SKUs")

## 4. Market Prices

Gather competitive pricing data from multiple sources:
- **Marketplace prices** - Regional marketplace data with fallbacks
- **Ben Soliman prices** - Competitor pricing
- **Scraped prices** - Web-scraped competitor data
- **Product statistics** - Historical margin boundaries

### 4.1 Marketplace Prices


In [None]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id)),
full_data as (
select products.id as product_id, region,warehouse_id
from products , whs 
where activation = 'true'
),				

MP as (
select region,product_id,
min(min_price) as min_price,
min(max_price) as max_price,
min(mod_price) as mod_price,
min(true_min) as true_min,
min(true_max) as true_max

from (
select mp.region,mp.product_id,mp.pu_id,
min_price/BASIC_UNIT_COUNT as min_price,
max_price/BASIC_UNIT_COUNT as max_price,
mod_price/BASIC_UNIT_COUNT as mod_price,
TRUE_MIN_PRICE/BASIC_UNIT_COUNT as true_min,
TRUE_MAX_PRICE/BASIC_UNIT_COUNT as true_max
from materialized_views.marketplace_prices mp 
join packing_unit_products pup on pup.product_id = mp.product_id and pup.packing_unit_id = mp.pu_id
join finance.all_cogs f on f.product_id = mp.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date
where  least(min_price,mod_price) between wac_p*0.9 and wac_p*1.3 
)
group by all 
),
region_mapping AS (
    SELECT * 
	FROM 
	(	VALUES
        ('Delta East', 'Delta West'),
        ('Delta West', 'Delta East'),
        ('Alexandria', 'Cairo'),
        ('Alexandria', 'Giza'),
        ('Upper Egypt', 'Cairo'),
        ('Upper Egypt', 'Giza'),
		('Cairo','Giza'),
		('Giza','Cairo'),
		('Delta West', 'Cairo'),
		('Delta East', 'Cairo'),
		('Delta West', 'Giza'),
		('Delta East', 'Giza')
		)
    AS region_mapping(region, fallback_region)
)


select region,warehouse_id,product_id,
min(final_min_price) as final_min_price,
min(final_max_price) as final_max_price,
min(final_mod_price) as final_mod_price,
min(final_true_min) as final_true_min,
min(final_true_max) as final_true_max

from (
SELECT
distinct 
	w.region,
    w.warehouse_id,
	w.product_id,
    COALESCE(m1.min_price, m2.min_price) AS final_min_price,
    COALESCE(m1.max_price, m2.max_price) AS final_max_price,
    COALESCE(m1.mod_price, m2.mod_price) AS final_mod_price,
	COALESCE(m1.true_min, m2.true_min) AS final_true_min,
	COALESCE(m1.true_max, m2.true_max) AS final_true_max,
FROM full_data w
LEFT JOIN MP m1
    ON w.region = m1.region and w.product_id = m1.product_id
JOIN region_mapping rm
    ON w.region = rm.region
LEFT JOIN MP m2
    ON rm.fallback_region = m2.region
   AND w.product_id = m2.product_id
)
where final_min_price is not null 
group by all 
'''

print("Fetching marketplace prices...")
marketplace = snowflake_query("Egypt", query)
marketplace.columns = marketplace.columns.str.lower()

for col in marketplace.columns:
    marketplace[col] = pd.to_numeric(marketplace[col], errors='ignore')

print(f"✓ Retrieved marketplace prices for {len(marketplace)} products")

### 4.2 Ben Soliman (Competitor) Prices


In [None]:
query = f'''
select z.* 
from (
select maxab_product_id as product_id,avg(bs_final_price) as ben_soliman_price
from (
select * , row_number()over(partition by maxab_product_id order by diff) as rnk_2
from (
select *,(bs_final_price-wac_p)/wac_p as diff_2
from (
select * ,bs_price/maxab_basic_unit_count as bs_final_price
from (
select *,row_number()over(partition by maxab_product_id,maxab_pu order by diff) as rnk 
from (
select sm.* ,max(INJECTION_DATE::date)over(partition by maxab_product_id,maxab_pu) as max_date,wac1,wac_p,abs(bs_price-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff 
from materialized_views.savvy_mapping sm 
join finance.all_cogs f on f.product_id = sm.maxab_product_id and current_timestamp between f.from_Date and f.to_date
where bs_price is not null 
and INJECTION_DATE::date >= CURRENT_DATE- 5
qualify INJECTION_DATE::date = max_date
)
qualify rnk = 1 
)
)
where diff_2 between -0.5 and 0.5 
)
qualify rnk_2 = 1 
)
group by all
)z 
join finance.all_cogs f on f.product_id = z.product_id and current_timestamp between f.from_Date and f.to_date

where ben_soliman_price between f.wac_p*0.9 and f.wac_p*1.3
'''

print("Fetching Ben Soliman (competitor) prices...")
bensoliman = snowflake_query("Egypt", query)
bensoliman.columns = bensoliman.columns.str.lower()

for col in bensoliman.columns:
    bensoliman[col] = pd.to_numeric(bensoliman[col], errors='ignore')

print(f"✓ Retrieved competitor prices for {len(bensoliman)} products")

### 4.3 Scraped Competitor Prices


In [None]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id))
select product_id,x.region,warehouse_id,min(MARKET_PRICE) as min_scrapped,max(MARKET_PRICE) as max_scrapped,median(MARKET_PRICE) as median_scrapped
from (
select MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.*,max(date)over(partition by region,MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id,competitor) as max_date
from MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES
join finance.all_cogs f on f.product_id = MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date 
where date>= current_date -5
and MARKET_PRICE between f.wac_p * 0.9 and wac_p*1.3
qualify date = max_date 
) x 
left join whs on whs.region = x.region
group by all 
'''

print("Fetching scraped competitor prices...")
scrapped_prices = snowflake_query("Egypt", query)
scrapped_prices.columns = scrapped_prices.columns.str.lower()

for col in scrapped_prices.columns:
    scrapped_prices[col] = pd.to_numeric(scrapped_prices[col], errors='ignore')

print(f"✓ Retrieved scraped prices for {len(scrapped_prices)} products")

### 4.4 Product Statistics (Margin Boundaries)


In [None]:
query = f'''
SELECT 
    region,
    product_id,
    optimal_bm,
    MIN_BOUNDARY,
    MAX_BOUNDARY,
    MEDIAN_BM
FROM (
    SELECT 
        region,
        product_id,
        target_bm,
        optimal_bm,
        MIN_BOUNDARY,
        MAX_BOUNDARY,
        MEDIAN_BM,
        MAX(created_at) OVER (PARTITION BY product_id, region) as max_date,
        created_at
    FROM materialized_views.PRODUCT_STATISTICS
    WHERE created_at::date >= DATE_TRUNC('month', CURRENT_DATE - 60)
    QUALIFY max_date = created_at
)
'''

print("Fetching product statistics (margin boundaries)...")
stats = snowflake_query("Egypt", query)
stats.columns = stats.columns.str.lower()

for col in stats.columns:
    stats[col] = pd.to_numeric(stats[col], errors='ignore')

print(f"✓ Retrieved margin statistics for {len(stats)} products")

### 4.5 Warehouse-Region Mapping


In [None]:
query = f'''
SELECT warehouse_id, region
FROM (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY warehouse_id ORDER BY nmv DESC) as rnk 
    FROM (
        SELECT 
            CASE WHEN regions.id = 2 THEN cities.name_en ELSE regions.name_en END as region,
            pso.warehouse_id,
            SUM(pso.total_price) as nmv
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
        JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
        JOIN cities ON cities.id = districts.city_id
        JOIN states ON states.id = cities.state_id
        JOIN regions ON regions.id = states.region_id             
        WHERE TRUE
            AND so.created_at::date BETWEEN CURRENT_DATE - 31 AND CURRENT_DATE - 1
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
        GROUP BY ALL
    )
    QUALIFY rnk = 1 
)
'''

print("Fetching warehouse-region mapping...")
warehouse_region = snowflake_query("Egypt", query)
warehouse_region.columns = warehouse_region.columns.str.lower()

for col in warehouse_region.columns:
    warehouse_region[col] = pd.to_numeric(warehouse_region[col], errors='ignore')

print(f"✓ Mapped {len(warehouse_region)} warehouses to regions")

### 4.6 Target Margins (Brand/Category)


In [None]:
# Brand-level target margins
query = f'''
SELECT DISTINCT cat, brand, margin as target_bm
FROM performance.commercial_targets cplan
QUALIFY 
    CASE 
        WHEN DATE_TRUNC('month', MAX(DATE) OVER()) = DATE_TRUNC('month', CURRENT_DATE) 
        THEN DATE_TRUNC('month', CURRENT_DATE)
        ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') 
    END = DATE_TRUNC('month', date)
'''

print("Fetching brand target margins...")
brand_cat_target = snowflake_query("Egypt", query)
brand_cat_target['target_bm'] = pd.to_numeric(brand_cat_target['target_bm'])
print(f"✓ Retrieved targets for {len(brand_cat_target)} brand-category combinations")

# Category-level weighted target margins
query = f'''
SELECT cat, SUM(target_bm * (target_nmv / cat_total)) as cat_target_margin
FROM (
    SELECT *, SUM(target_nmv) OVER (PARTITION BY cat) as cat_total
    FROM (
        SELECT cat, brand, AVG(target_bm) as target_bm, SUM(target_nmv) as target_nmv
        FROM (
            SELECT DISTINCT 
                date, 
                city as region, 
                cat, 
                brand, 
                margin as target_bm, 
                nmv as target_nmv
            FROM performance.commercial_targets cplan
            QUALIFY 
                CASE 
                    WHEN DATE_TRUNC('month', MAX(DATE) OVER()) = DATE_TRUNC('month', CURRENT_DATE) 
                    THEN DATE_TRUNC('month', CURRENT_DATE)
                    ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') 
                END = DATE_TRUNC('month', date)
        )
        GROUP BY ALL
    )
)
GROUP BY ALL 
'''

print("Fetching category target margins...")
cat_target = snowflake_query("Egypt", query)
cat_target['cat_target_margin'] = pd.to_numeric(cat_target['cat_target_margin'])
print(f"✓ Retrieved targets for {len(cat_target)} categories")

### 4.7 Merge All Data Sources


In [None]:
# =============================================================================
# MERGE ALL DATA SOURCES
# =============================================================================

print("Merging all data sources...")

# Start with selected products + tier quantities
final_data = selected_products.merge(
    tiers_selection[[
        'warehouse_id', 'product_id', 'packing_unit_id',
        'tier_1_qty', 'tier_2_qty', 'median_qty',
        'tier_1_increase_pct', 'tier_2_increase_pct'
    ]],
    on=['warehouse_id', 'product_id', 'packing_unit_id']
)

# Select relevant columns
final_data = final_data[[
    'warehouse_id', 'product_id', 'packing_unit_id', 'sku', 'brand', 'cat',
    'packing_unit_price', 'basic_unit_count', 
    'tier_1_qty', 'tier_2_qty', 'median_qty',
    'tier_1_increase_pct', 'tier_2_increase_pct', 'final_rank'
]]

# Add WAC (weighted average cost)
final_data = final_data.merge(sku_info[['product_id', 'wac_p']], on='product_id')
final_data['wac_p'] = (final_data['wac_p'] * final_data['basic_unit_count']).round(2)

# Add marketplace prices
final_data = final_data.merge(marketplace, on=['product_id', 'warehouse_id'], how='left')
final_data = final_data.drop(columns='region')

# Add competitor prices
final_data = final_data.merge(bensoliman[['product_id', 'ben_soliman_price']], on=['product_id'], how='left')
final_data = final_data.merge(scrapped_prices, on=['product_id', 'warehouse_id'], how='left')
final_data = final_data.drop(columns='region')

# Add region and margin data
final_data = final_data.merge(warehouse_region, on=['warehouse_id'])
final_data = final_data.merge(stats, on=['product_id', 'region'], how='left')
final_data = final_data.merge(brand_cat_target, on=['brand', 'cat'], how='left')
final_data = final_data.merge(cat_target, on=['cat'], how='left')

# Use brand target margin, fall back to category target margin
final_data['Target_margin'] = final_data['target_bm'].fillna(final_data['cat_target_margin'])

print(f"✓ Merged data: {len(final_data)} products with all pricing data")

### LIVE CART Rules

In [None]:
query = '''
SELECT 
    cppu.cohort_id,
    product_id,
    packing_unit_id,
    basic_unit_count,
    COALESCE(cppu.MAX_PER_SALES_ORDER, cppu2.MAX_PER_SALES_ORDER) as current_cart_rule
FROM COHORT_PRODUCT_PACKING_UNITS cppu 
JOIN PACKING_UNIT_PRODUCTS pup ON cppu.PRODUCT_PACKING_UNIT_ID = pup.id 
JOIN cohorts c ON c.id = cppu.cohort_id
JOIN COHORT_PRODUCT_PACKING_UNITS cppu2 
    ON cppu.PRODUCT_PACKING_UNIT_ID = cppu2.PRODUCT_PACKING_UNIT_ID 
    AND cppu2.cohort_id = c.FALLBACK_COHORT_ID 
WHERE cppu.cohort_id IN (700, 701, 702, 703, 704, 1123, 1124, 1125, 1126)
'''

print("Fetching live cart rules...")
live_cart_rules = snowflake_query("Egypt", query) 
live_cart_rules.columns = live_cart_rules.columns.str.lower()

for col in live_cart_rules.columns:
    live_cart_rules[col] = pd.to_numeric(live_cart_rules[col], errors='ignore')

print(f"✓ Retrieved {len(live_cart_rules)} cart rules")

Unnamed: 0,cohort_id,product_id,packing_unit_id,basic_unit_count,current_cart_rule
0,1124,8494,1,6,25
1,1123,11148,1,1,10
2,1126,6587,1,1,25
3,1123,10026,1,6,10
4,1125,13060,3,1,25
...,...,...,...,...,...
108945,700,5901,1,48,25
108946,701,9340,2,1,5
108947,701,7892,1,12,25
108948,700,8956,1,1,25


In [None]:
# Cohort to Warehouse mapping
mapping_coh_wh = pd.DataFrame({
    'region':       ['Cairo', 'Cairo', 'Giza', 'Delta West', 'Delta West', 'Delta East', 
                     'Delta East', 'Upper Egypt', 'Upper Egypt', 'Upper Egypt', 'Upper Egypt', 
                     'Alexandria', 'Giza'],
    'wh':           ['El-Marg', 'Mostorod', 'Barageel', 'El-Mahala', 'Tanta', 'Mansoura FC',
                     'Sharqya', 'Assiut FC', 'Bani sweif', 'Menya Samalot', 'Sohag',
                     'Khorshed Alex', 'Sakkarah'],
    'warehouse_id': [38, 1, 236, 337, 8, 339, 170, 501, 401, 703, 632, 797, 962],
    'cohort_id':    [700, 700, 701, 703, 703, 704, 704, 1124, 1126, 1123, 1125, 702, 701]
})


Unnamed: 0,region,wh,warehouse_id,cohort_id
0,Cairo,El-Marg,38,700
1,Cairo,Mostorod,1,700
2,Giza,Barageel,236,701
3,Delta West,El-Mahala,337,703
4,Delta West,Tanta,8,703
5,Delta East,Mansoura FC,339,704
6,Delta East,Sharqya,170,704
7,Upper Egypt,Assiut FC,501,1124
8,Upper Egypt,Bani sweif,401,1126
9,Upper Egypt,Menya Samalot,703,1123


In [None]:
# Add warehouse mapping to cart rules
live_cart_rules = live_cart_rules.merge(mapping_coh_wh, on='cohort_id')
print(f"✓ Cart rules mapped to {live_cart_rules['warehouse_id'].nunique()} warehouses")

Unnamed: 0,cohort_id,product_id,packing_unit_id,basic_unit_count,current_cart_rule,region,wh,warehouse_id
0,1124,8494,1,6,25,Upper Egypt,Assiut FC,501
1,1123,11148,1,1,10,Upper Egypt,Menya Samalot,703
2,1126,6587,1,1,25,Upper Egypt,Bani sweif,401
3,1123,10026,1,6,10,Upper Egypt,Menya Samalot,703
4,1125,13060,3,1,25,Upper Egypt,Sohag,632
...,...,...,...,...,...,...,...,...
157387,701,7892,1,12,25,Giza,Sakkarah,962
157388,700,8956,1,1,25,Cairo,El-Marg,38
157389,700,8956,1,1,25,Cairo,Mostorod,1
157390,700,3764,1,24,25,Cairo,El-Marg,38


## 5. Price Tier Calculation

Calculate tier 1 and tier 2 prices with constraints:
- **Max discount**: 5% from current price
- **Min discount**: 0.35% from current price  
- **Ratio bounds**: discount-to-quantity ratio between 1.3 and 3.5
- **Price ordering**: WAC < Tier 2 < Tier 1 < Current Price


### 5.1 Price Calculation Functions

The `calculate_tier_prices` function uses multiple strategies:
1. **Market prices strategy** - Use competitive pricing data if available
2. **Margin range strategy** - Calculate from margin boundaries if no market data
3. **Ratio adjustment** - Adjust tier_2 price to meet discount-to-quantity ratio bounds


In [None]:
def calculate_tier_prices(row, max_discount_pct=5.0, min_discount_pct=0.35, min_ratio=1.1, max_ratio=3.5):
    """
    Calculate tier 1 and tier 2 prices for a single row.
    
    Parameters:
    - max_discount_pct: Maximum allowed discount from current price (default: 5%)
    - min_discount_pct: Minimum required discount from current price (default: 0.35%)
    - min_ratio: Minimum discount-to-quantity ratio (default: 1.3)
    - max_ratio: Maximum discount-to-quantity ratio (default: 3.5)
    
    Constraints:
    - Tier prices must not go below price calculated with 0.3 * target_margin
    - Ensure: WAC < Tier 2 < Tier 1 < Current Price
    - Ensure: BOTH tiers must be valid or BOTH are None
    - Ensure: discount_qty_ratio = (tier_2_discount/tier_1_discount) / (tier_2_qty/tier_1_qty) is between min_ratio and max_ratio
    """
    
    current_price = row['packing_unit_price']
    wac = row['wac_p']
    
    # Get basic_unit_count for converting market prices
    basic_unit_count = row.get('basic_unit_count', 1)
    if pd.isna(basic_unit_count) or basic_unit_count <= 0:
        basic_unit_count = 1
    
    # Validation
    if pd.isna(current_price) or current_price <= 0:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'invalid_current_price'})
    
    if pd.isna(wac) or wac <= 0:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'invalid_wac'})
    
    if current_price <= wac:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'current_price_below_wac'})
    
    # Calculate discount bounds
    max_discount_price = current_price * (1 - max_discount_pct / 100)  # Minimum allowed price
    min_discount_price = current_price * (1 - min_discount_pct / 100)  # Maximum allowed price
    
    # Calculate absolute minimum price based on target_margin
    # Price must maintain at least 30% of target margin
    absolute_min_price = wac  # Default to WAC if no target_margin
    
    if 'target_margin' in row.index and pd.notna(row['target_margin']) and 0 < row['target_margin'] < 1:
        target_margin = row['target_margin']
        # Minimum margin is 30% of target margin
        min_margin = target_margin * 0.3
        # Calculate minimum price: price = wac / (1 - min_margin)
        absolute_min_price = wac / (1 - min_margin)
    else:
        # Fallback: use wac_cushion_pct
        wac_cushion_pct = 0.25
        absolute_min_price = wac / (1 - (wac_cushion_pct / 100))
    
    # Market price columns (these are per basic unit)
    market_cols = [
        'final_mod_price', 'median_scrapped', 'final_max_price', 
        'ben_soliman_price', 'max_scrapped', 'final_true_max',
        'final_min_price', 'min_scrapped', 'final_true_min'
    ]
    
    # Extract valid market prices (multiply by basic_unit_count, above absolute_min_price, within discount bounds)
    valid_market_prices = []
    for col in market_cols:
        if col in row.index and pd.notna(row[col]) and row[col] > 0:
            # Convert basic unit price to packing unit price
            packing_price = row[col] * basic_unit_count
            
            # Must be: above absolute_min_price AND within discount bounds
            if absolute_min_price < packing_price and max_discount_price <= packing_price <= min_discount_price:
                valid_market_prices.append(packing_price)
    
    # Remove duplicates and sort descending
    valid_market_prices = sorted(list(set(valid_market_prices)), reverse=True)
    
    tier_1 = None
    tier_2 = None
    source = ''
    
    min_gap_pct = 0.25
    
    # Strategy 1: Use market prices
    if len(valid_market_prices) >= 3:
        # Select from available prices
        tier_1 = valid_market_prices[0]  # Highest price
        
        # Find tier 2 with minimum gap
        for price in valid_market_prices[1:]:
            if price < tier_1 * (1 - min_gap_pct / 100):
                tier_2 = price
                break
        
        # If no suitable tier 2 found, take second highest
        if tier_2 is None and len(valid_market_prices) > 1:
            tier_2 = valid_market_prices[1]
        
        if tier_1 is not None and tier_2 is not None:
            source = 'market_prices'
    
    elif len(valid_market_prices) == 2:
        tier_1 = valid_market_prices[0]
        tier_2 = valid_market_prices[1]
        source = 'market_prices'
    
    elif len(valid_market_prices) == 1:
        # Only one market price - use margin range for the other
        market_price = valid_market_prices[0]
        
        # Calculate which tier this should be based on its position
        price_position = (market_price - max_discount_price) / (min_discount_price - max_discount_price)
        
        # If in upper half (>0.5), use as tier 1 and calculate tier 2
        # If in lower half (<=0.5), use as tier 2 and calculate tier 1
        if price_position > 0.5:
            tier_1 = market_price
            tier_2 = calculate_from_margin_range(row, wac, current_price, tier_1, tier=2, 
                                                 max_discount_price=max_discount_price,
                                                 min_discount_price=min_discount_price,
                                                 absolute_min_price=absolute_min_price)
            if tier_2 is not None:
                source = 'market_tier1_margin_tier2'
        else:
            tier_2 = market_price
            tier_1 = calculate_from_margin_range(row, wac, current_price, tier_2, tier=1,
                                                 max_discount_price=max_discount_price,
                                                 min_discount_price=min_discount_price,
                                                 absolute_min_price=absolute_min_price)
            if tier_1 is not None:
                source = 'margin_tier1_market_tier2'
    
    # Strategy 2: No market prices - use margin range method
    if tier_1 is None or tier_2 is None:
        tier_1, tier_2 = calculate_both_from_margin_range(row, wac, current_price,
                                                          max_discount_price=max_discount_price,
                                                          min_discount_price=min_discount_price,
                                                          absolute_min_price=absolute_min_price)
        if tier_1 is not None and tier_2 is not None:
            source = 'margin_range_based'
    
    # CRITICAL: Final validation - BOTH must be valid or BOTH are None
    if tier_1 is not None and tier_2 is not None:
        # Ensure correct ordering
        if tier_2 >= tier_1:
            tier_1, tier_2 = max(tier_1, tier_2), min(tier_1, tier_2)
        
        # Apply discount bounds
        tier_1 = max(tier_1, max_discount_price)
        tier_1 = min(tier_1, min_discount_price)
        tier_2 = max(tier_2, max_discount_price)
        tier_2 = min(tier_2, min_discount_price)
        
        # Check if both above absolute minimum price
        if tier_1 <= absolute_min_price or tier_2 <= absolute_min_price:
            tier_1 = None
            tier_2 = None
            source = 'prices_below_minimum_margin'
        else:
            # Ensure minimum gap between tiers
            if tier_2 > tier_1 * (1 - min_gap_pct / 100):
                tier_2 = tier_1 * (1 - min_gap_pct / 100)
                if tier_2 <= absolute_min_price:
                    tier_1 = None
                    tier_2 = None
                    source = 'insufficient_gap_between_tiers'
            
            # Final check: both still valid?
            if tier_1 is not None and tier_2 is not None:
                if not (wac < tier_2 < tier_1 < current_price):
                    tier_1 = None
                    tier_2 = None
                    source = 'invalid_tier_ordering'
                elif not (max_discount_price <= tier_2 and tier_1 <= min_discount_price):
                    tier_1 = None
                    tier_2 = None
                    source = 'tiers_outside_discount_bounds'
                else:
                    tier_1 = round(tier_1, 2)
                    tier_2 = round(tier_2, 2)
                    
                    # Validate and adjust discount-to-quantity ratio
                    tier_1_qty = row.get('tier_1_qty', None)
                    tier_2_qty = row.get('tier_2_qty', None)
                    
                    if tier_1_qty is not None and tier_2_qty is not None and tier_1_qty > 0:
                        tier_1_discount = current_price - tier_1
                        tier_2_discount = current_price - tier_2
                        
                        if tier_1_discount > 0:
                            diff_quantity = tier_2_qty / tier_1_qty
                            diff_discount = tier_2_discount / tier_1_discount
                            
                            if diff_quantity > 0:
                                discount_qty_ratio = diff_discount / diff_quantity
                                
                                # Adjust tier_2_price if ratio is outside bounds
                                if discount_qty_ratio < min_ratio:
                                    # Ratio too low - need more discount at tier 2
                                    # tier_2 = current_price - (target_ratio * diff_quantity * tier_1_discount)
                                    target_tier_2_discount = min_ratio * diff_quantity * tier_1_discount
                                    adjusted_tier_2 = current_price - target_tier_2_discount
                                    
                                    # Ensure adjusted price is still valid (above WAC and absolute_min_price)
                                    if adjusted_tier_2 > wac and adjusted_tier_2 > absolute_min_price and adjusted_tier_2 < tier_1:
                                        tier_2 = round(adjusted_tier_2, 2)
                                        source = source + '_ratio_adjusted_up'
                                    else:
                                        tier_1 = None
                                        tier_2 = None
                                        source = f'cannot_adjust_ratio_{discount_qty_ratio:.2f}_min_bound'
                                
                                elif discount_qty_ratio > max_ratio:
                                    # Ratio too high - need less discount at tier 2
                                    # tier_2 = current_price - (target_ratio * diff_quantity * tier_1_discount)
                                    target_tier_2_discount = max_ratio * diff_quantity * tier_1_discount
                                    adjusted_tier_2 = current_price - target_tier_2_discount
                                    
                                    # Ensure adjusted price is still valid (below tier_1 and above WAC)
                                    if adjusted_tier_2 > wac and adjusted_tier_2 > absolute_min_price and adjusted_tier_2 < tier_1:
                                        tier_2 = round(adjusted_tier_2, 2)
                                        source = source + '_ratio_adjusted_down'
                                    else:
                                        tier_1 = None
                                        tier_2 = None
                                        source = f'cannot_adjust_ratio_{discount_qty_ratio:.2f}_max_bound'
    
    # FINAL CHECK: If only one tier exists, invalidate both
    if (tier_1 is None and tier_2 is not None) or (tier_1 is not None and tier_2 is None):
        tier_1 = None
        tier_2 = None
        source = 'incomplete_tier_pair'
    
    # If both are None and no source set, mark it
    if tier_1 is None and tier_2 is None and source == '':
        source = 'no_valid_prices'
    
    return pd.Series({
        'tier_1_price': tier_1,
        'tier_2_price': tier_2,
        'price_source': source
    })


def calculate_both_from_margin_range(row, wac, current_price, max_discount_price, min_discount_price, absolute_min_price):
    """
    Calculate both tier prices using margin range from minimum of (min_boundary, optimal_bm) to current margin.
    Returns (tier_1_price, tier_2_price) or (None, None)
    """
    
    # Calculate current margin: margin = (price - wac) / price
    current_margin = (current_price - wac) / current_price
    
    # Get min_boundary margin
    min_boundary_margin = None
    if 'min_boundary' in row.index and pd.notna(row['min_boundary']) and 0 < row['min_boundary'] < 1:
        min_boundary_margin = row['min_boundary']
    
    # Get optimal_bm margin
    optimal_margin = None
    if 'optimal_bm' in row.index and pd.notna(row['optimal_bm']) and 0 < row['optimal_bm'] < 1:
        optimal_margin = row['optimal_bm']
    
    # Determine starting margin: minimum of (min_boundary, optimal_bm)
    start_margin = None
    
    if min_boundary_margin is not None and optimal_margin is not None:
        start_margin = min(min_boundary_margin, optimal_margin)
    elif min_boundary_margin is not None:
        start_margin = min_boundary_margin
    elif optimal_margin is not None:
        start_margin = optimal_margin
    else:
        # Fallback: use 50% of current margin
        start_margin = current_margin * 0.85
    
    # Ensure start_margin is less than current margin
    if start_margin >= current_margin:
        start_margin = current_margin * 0.85
    
    # Generate margin points in the range (10 points)
    num_points = 10
    margin_range = np.linspace(start_margin, current_margin, num_points)
    
    # Calculate prices from these margins: price = wac / (1 - margin)
    price_candidates = []
    for margin in margin_range:
        if 0 < margin < 1:
            price = wac / (1 - margin)
            # Only keep prices within discount bounds and above absolute_min_price
            if absolute_min_price < price and max_discount_price <= price <= min_discount_price:
                price_candidates.append(price)
    
    if len(price_candidates) < 2:
        return None, None
    
    # Sort prices descending
    price_candidates = sorted(price_candidates, reverse=True)
    
    # Select Tier 1: closer to the top (less discount)
    # Select Tier 2: further down (more discount)
    tier_1_idx = int(len(price_candidates) * 0.25)  # 25% from top
    tier_2_idx = int(len(price_candidates) * 0.65)  # 65% from top
    
    # Ensure valid indices
    tier_1_idx = max(0, min(tier_1_idx, len(price_candidates) - 2))
    tier_2_idx = max(tier_1_idx + 1, min(tier_2_idx, len(price_candidates) - 1))
    
    tier_1 = price_candidates[tier_1_idx]
    tier_2 = price_candidates[tier_2_idx]
    
    # Ensure meaningful gap (at least 0.5%)
    min_gap_pct = 0.25
    if tier_2 > tier_1 * (1 - min_gap_pct / 100):
        # Try to find better tier_2
        for i in range(tier_2_idx + 1, len(price_candidates)):
            if price_candidates[i] < tier_1 * (1 - min_gap_pct / 100):
                tier_2 = price_candidates[i]
                break
    
    # Final validation
    if tier_2 >= tier_1 or tier_1 <= absolute_min_price or tier_2 <= absolute_min_price:
        return None, None
    
    return tier_1, tier_2


def calculate_from_margin_range(row, wac, current_price, other_tier_price, tier, 
                                max_discount_price, min_discount_price, absolute_min_price):
    """
    Calculate single tier price using margin range.
    Used when one tier is from market and we need to calculate the other.
    """
    
    # Calculate current margin
    current_margin = (current_price - wac) / current_price
    
    # Get min_boundary margin
    min_boundary_margin = None
    if 'min_boundary' in row.index and pd.notna(row['min_boundary']) and 0 < row['min_boundary'] < 1:
        min_boundary_margin = row['min_boundary']
    
    # Get optimal_bm margin
    optimal_margin = None
    if 'optimal_bm' in row.index and pd.notna(row['optimal_bm']) and 0 < row['optimal_bm'] < 1:
        optimal_margin = row['optimal_bm']
    
    # Determine starting margin: minimum of (min_boundary, optimal_bm)
    start_margin = None
    
    if min_boundary_margin is not None and optimal_margin is not None:
        start_margin = min(min_boundary_margin, optimal_margin)
    elif min_boundary_margin is not None:
        start_margin = min_boundary_margin
    elif optimal_margin is not None:
        start_margin = optimal_margin
    else:
        start_margin = current_margin * 0.5
    
    # Ensure start_margin is less than current margin
    if start_margin >= current_margin:
        start_margin = current_margin * 0.7
    
    # Generate margin range (10 points)
    num_points = 10
    margin_range = np.linspace(start_margin, current_margin, num_points)
    
    # Calculate prices
    price_candidates = []
    for margin in margin_range:
        if 0 < margin < 1:
            price = wac / (1 - margin)
            if absolute_min_price < price and max_discount_price <= price <= min_discount_price:
                price_candidates.append(price)
    
    if len(price_candidates) == 0:
        return None
    
    # Sort prices descending
    price_candidates = sorted(price_candidates, reverse=True)
    
    min_gap_pct = 0.5
    
    if tier == 1:
        # Need tier 1 (higher price), we have tier 2 (lower price)
        # Find prices above tier 2 with proper gap
        target_candidates = [p for p in price_candidates 
                           if p > other_tier_price * (1 + min_gap_pct / 100)]
        if target_candidates:
            # Take from upper portion (25% position)
            idx = int(len(target_candidates) * 0.25)
            return target_candidates[idx]
        return None
    
    else:
        # Need tier 2 (lower price), we have tier 1 (higher price)
        # Find prices below tier 1 with proper gap
        target_candidates = [p for p in price_candidates 
                           if p < other_tier_price * (1 - min_gap_pct / 100)]
        if target_candidates:
            # Take from lower portion (65% position)
            idx = int(len(target_candidates) * 0.65)
            idx = min(idx, len(target_candidates) - 1)
            return target_candidates[idx]
        return None


### 5.2 Apply Price Calculations


In [None]:
# =============================================================================
# APPLY PRICE CALCULATIONS
# =============================================================================

# Normalize column names
final_data.columns = final_data.columns.str.lower()

print(f"Processing {len(final_data)} SKUs...")
print(f"Parameters: MAX_DISCOUNT={MAX_DISCOUNT_PCT}%, MIN_DISCOUNT={MIN_DISCOUNT_PCT}%, RATIO=[{MIN_RATIO}, {MAX_RATIO}]")

# Apply price calculation to each row
result = final_data.apply(
    lambda row: calculate_tier_prices(
        row, 
        max_discount_pct=MAX_DISCOUNT_PCT,
        min_discount_pct=MIN_DISCOUNT_PCT,
        min_ratio=MIN_RATIO,
        max_ratio=MAX_RATIO
    ), 
    axis=1
)

# Merge results back to dataframe
final_data = pd.concat([final_data, result], axis=1)

# Summary of ratio adjustments
ratio_adjusted_up = final_data['price_source'].str.contains('ratio_adjusted_up', na=False).sum()
ratio_adjusted_down = final_data['price_source'].str.contains('ratio_adjusted_down', na=False).sum()
cannot_adjust = final_data['price_source'].str.contains('cannot_adjust_ratio', na=False).sum()

print(f"\n--- Ratio Adjustment Summary ---")
print(f"  Adjusted up (was below {MIN_RATIO}):      {ratio_adjusted_up} SKUs")
print(f"  Adjusted down (was above {MAX_RATIO}):    {ratio_adjusted_down} SKUs")
print(f"  Could not adjust (constraints violated): {cannot_adjust} SKUs")

# Filter to only products with valid tier prices
final_data = final_data[
    (~final_data['tier_1_price'].isna()) & 
    (~final_data['tier_2_price'].isna())
]

print(f"\n✓ Final SKUs with valid tier prices: {len(final_data)}")

Processing 2400 SKUs...
Ratio adjusted up (was below 1.1): 1362 SKUs
Ratio adjusted down (was above 3): 46 SKUs
Could not adjust (constraints violated): 61 SKUs
Final SKUs with valid tier prices: 1963


## 6. Wholesale Pricing

Calculate wholesale prices based on:
- Vehicle capacity (quarter truck)
- Rank-based margin tiers (20%, 25%, 40%, 60% of target margin)
- Must be below tier_2_price


In [None]:
# =============================================================================
# PREPARE DELIVERY FEE DATA
# =============================================================================

# Set delivery fees based on region
final_data['delivery_fees'] = DELIVERY_FEE_OTHER
final_data.loc[final_data['region'].isin(['Cairo', 'Giza']), 'delivery_fees'] = DELIVERY_FEE_CAIRO_GIZA

# Prepare query data for wholesale calculation
query_data = final_data[['warehouse_id', 'product_id', 'packing_unit_id', 'delivery_fees']].values.tolist()
query_info = ','.join([
    f"({int(wh_id)}, {int(prod_id)}, {int(pu_id)}, {int(delivery_fees)})" 
    for wh_id, prod_id, pu_id, delivery_fees in query_data
])

print(f"✓ Prepared {len(query_data)} products for wholesale calculation")

In [None]:
query = f'''
with chosen_products as (
select *
from (
values 
{query_info}
)x(warehouse_id,product_id,packing_unit_id,delivery_fees)

),
vec as (
select  vt.id as vehicle_id,name_en as vehicle_name,vc.weight as vehicle_weight,vc.cbm as vehicle_cbm,900 as vehicle_cost
from VEHICLE_TYPES  vt 
join  RETOOL.VEHICLE_CAPACITIES vc on vc.vehicle_id = vt.id
where vehicle_id = 1
),
selected_products as (
select x.*,	(long*width*height)/1000000 AS cbm,weight/1000 AS weight,
from chosen_products x
join packing_unit_products on x.product_id = packing_unit_products.product_id and packing_unit_products.packing_unit_id = x.packing_unit_id
),
main_cte as (
select warehouse_id,product_id,packing_unit_id,delivery_fees,
ceil(least(quart_dababa_wht,quart_dababa_cbm)) as quart_dababa,
vehicle_cost
from (
select * ,
((vehicle_weight*0.9)/4)/weight as quart_dababa_wht , 
((vehicle_cbm*0.9)/4)/cbm as quart_dababa_cbm  
from (
select selected_products.*, vehicle_weight,vehicle_cbm,vehicle_cost
from selected_products,vec
)
)
)
select mc.*, f.wac_p , 
(f.wac_p*quart_dababa)+(((vehicle_cost-(delivery_fees*4))*0.9)/4) as quart_cost,
quart_cost/quart_dababa as unit_cost


from main_cte mc 
join finance.all_cogs f on f.product_id = mc.product_id and CURRENT_TIMEstamp between from_date and to_date 

'''

print("Fetching wholesale cost data (quarter truck calculations)...")
ws_data = snowflake_query("Egypt", query)
ws_data.columns = ws_data.columns.str.lower()

for col in ws_data.columns:
    ws_data[col] = pd.to_numeric(ws_data[col], errors='ignore')

# Select and rename columns
ws_data = ws_data[['warehouse_id', 'product_id', 'packing_unit_id', 'quart_dababa', 'unit_cost']]
ws_data.columns = ['warehouse_id', 'product_id', 'packing_unit_id', 'WS_tier', 'WS_wac']

print(f"✓ Calculated wholesale data for {len(ws_data)} products")

In [None]:
# =============================================================================
# LOAD FORCED BRANDS/CATEGORIES FROM GOOGLE SHEETS
# =============================================================================

scope = [
    "https://spreadsheets.google.com/feeds",
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive.file",
    "https://www.googleapis.com/auth/drive"
]

creds = ServiceAccountCredentials.from_json_keyfile_dict(
    json.loads(setup_environment_2.get_secret("prod/maxab-sheets")), 
    scope
)
client = gspread.authorize(creds)

# Load forced brands and categories
force_brands = client.open('Wholesales_exec').worksheet('brands')
force_cats = client.open('Wholesales_exec').worksheet('cats')
force_brands_df = pd.DataFrame(force_brands.get_all_records())
force_cats_df = pd.DataFrame(force_cats.get_all_records())

# Extract unique lists
forced_brand_list = force_brands_df.brand.unique() if not force_brands_df.empty else []
forced_cat_list = force_cats_df.cat.unique() if not force_cats_df.empty else []

print(f"✓ Loaded {len(forced_brand_list)} forced brands, {len(forced_cat_list)} forced categories")

In [None]:
query = '''
SELECT product_id, new_pp, forecasted_date
FROM materialized_views.DBDP_PRICE_UPS
WHERE region = 'Cairo'
'''

print("Fetching price-up forecasts...")
price_ups = snowflake_query("Egypt", query)
price_ups.columns = price_ups.columns.str.lower()

for col in price_ups.columns:
    price_ups[col] = pd.to_numeric(price_ups[col], errors='ignore')

print(f"✓ Retrieved {len(price_ups)} price-up forecasts")

In [None]:
# Merge wholesale data and price-ups with final data
final_data = final_data.merge(ws_data, on=['warehouse_id', 'product_id', 'packing_unit_id'], how='left')
final_data['WS_wac'] = final_data['WS_wac'] * final_data['basic_unit_count']
final_data = final_data.merge(price_ups, on='product_id', how='left')

print(f"✓ Added wholesale and price-up data to {len(final_data)} products")

In [50]:
def wholesales_margin(x):
    """
    Calculate wholesale price based on margins and product tiers.
    """
    # Extract key variables
    wac = x['WS_wac']
    target_margin = x['target_margin']
    tier_2_price = x['tier_2_price']
    final_rank = x['final_rank']
    new_pp = x['new_pp']
    brand = x['brand']
    category = x['cat']
    margin = ((x['packing_unit_price'] - x['wac_p']) / x['packing_unit_price'])
    
    # Update target margin if new_pp exists
    if not pd.isna(new_pp):
        target_margin =  margin* 0.9
    
    # Define constants
    MIN_MARGIN = 0.01
    TOTAL_RANKS = 133
    
    # Special brand handling
    if brand in forced_brand_list:
        return _calculate_forced_brand_price(x, wac, target_margin)
    
    # Fiori brand special case
    if brand == 'فيوري':
        return wac / (1 - (margin * 0.9))
    
    # Paper products special case
    if category == 'ورقيات':
        margin = np.minimum(np.maximum(0.6 * target_margin, 0.015), target_margin)
        return wac / (1 - margin)
    
    # Standard tier-based pricing
    tier = _determine_tier(final_rank, TOTAL_RANKS)
    price = _calculate_tier_price(wac, target_margin, tier)
    
    # Adjust if price exceeds tier 2 price
    if price >= tier_2_price:
        price = (wac + tier_2_price) / 2
    
    # Ensure minimum margin
    return np.maximum(price, wac / (1 - MIN_MARGIN))


def _calculate_forced_brand_price(x, wac, target_margin):
    """Calculate price for forced brands with special margin rules."""
    brand = x['brand']
    margin = ((x['packing_unit_price'] - x['wac_p']) / x['packing_unit_price'])
    min_target = 0.25 * target_margin
    
    if brand in ['كوكا كولا', 'شويبس']:
        return np.maximum(wac / (1 - (margin * 0.65)), min_target)
    elif brand == 'جود كير':
        return np.maximum(wac / (1 - (margin * 0.5)), min_target)
    else:
        return wac / (1 - (margin * 0.8))


def _determine_tier(rank, total_ranks):
    """Determine product tier based on ranking."""
    if rank <= 0.25 * total_ranks:
        return 1
    elif rank <= 0.5 * total_ranks:
        return 2
    elif rank <= 0.75 * total_ranks:
        return 3
    else:
        return 4


def _calculate_tier_price(wac, target_margin, tier):
    """Calculate price based on tier with appropriate margin adjustments."""
    tier_config = {
        1: {'multiplier': 0.2, 'min_margin': 0.01},
        2: {'multiplier': 0.25, 'min_margin': 0.015},
        3: {'multiplier': 0.4, 'min_margin': 0.015},
        4: {'multiplier': 0.6, 'min_margin': 0.015}
    }
    
    config = tier_config[tier]
    adjusted_margin = config['multiplier'] * target_margin
    margin = np.minimum(np.maximum(adjusted_margin, config['min_margin']), target_margin)
    
    return wac / (1 - margin)

In [None]:
# Calculate wholesale prices
print("Calculating wholesale prices...")
final_data['WS_price'] = final_data.apply(wholesales_margin, axis=1)

# Validate: WS price must be below tier 2 price
final_data['valid'] = final_data['WS_price'] < final_data['tier_2_price']
final_data.loc[final_data['valid'] == False, 'WS_price'] = np.nan

valid_ws = final_data['WS_price'].notna().sum()
print(f"✓ Valid wholesale prices: {valid_ws} / {len(final_data)}")

### 6.2 Wholesale NEW Logic (Delivery Savings Based)

New wholesale pricing based on delivery cost savings:
- **Car cost**: 900 EGP per delivery
- **Car capacity**: 1.8 tons max
- **Max ticket size**: 50,000 EGP
- **Logic**: If retailer orders multiples of average ticket size, they save deliveries
  - 2x avg TS = 1 delivery saved → discount = delivery cost savings
  - 3x avg TS = 2 deliveries saved → more discount
- **Goal**: Find optimal quantity that gives retailer max savings while price stays above WAC


In [52]:
# =============================================================================
# WHOLESALE NEW LOGIC - Configuration
# =============================================================================
WS_CAR_COST = 1100           # Cost per delivery (EGP)
WS_CAR_CAPACITY_TONS = 1.8  # Max car capacity in tons
WS_MAX_TICKET_SIZE = 40000  # Maximum ticket size (EGP)
WS_MIN_MARGIN = 0.01        # Minimum margin (1%) above WAC

# Query to get average ticket size per warehouse
query = f'''
WITH base AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
    FROM (
        SELECT x.*, TAGGABLE_ID as retailer_id 
        FROM (
            SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
            FROM cohorts 
            WHERE is_active = 'true'
                AND id IN (700,701,702,703,704,1123,1124,1125,1126)
        ) x 
        JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
        WHERE dt.taggable_id IN (
            SELECT taggable_id FROM DYNAMIC_TAGgables 
            WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
        )
    )
    QUALIFY rnk = 1 
),

-- Map regions to warehouses
whs AS (
    SELECT * FROM (VALUES
        ('Cairo', 'El-Marg', 38),
        ('Cairo', 'Mostorod', 1),
        ('Giza', 'Barageel', 236),
        ('Giza', 'Sakkarah', 962),
        ('Delta West', 'El-Mahala', 337),
        ('Delta West', 'Tanta', 8),
        ('Delta East', 'Mansoura FC', 339),
        ('Delta East', 'Sharqya', 170),
        ('Upper Egypt', 'Assiut FC', 501),
        ('Upper Egypt', 'Bani sweif', 401),
        ('Upper Egypt', 'Menya Samalot', 703),
        ('Upper Egypt', 'Sohag', 632),
        ('Alexandria', 'Khorshed Alex', 797)
    ) x(region_name, wh, warehouse_id)
),

-- Get ticket sizes (order values) for last 4 months
ticket_sizes AS (
    SELECT 
        whs.warehouse_id,
        whs.wh as warehouse_name,
        so.parent_sales_order_id,
        so.retailer_id,
        SUM(pso.total_price) as ticket_size,
        SUM(pso.purchased_item_count * pup.weight / 1000) as order_weight_kg
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN base ON base.retailer_id = so.retailer_id
    JOIN packing_unit_products pup ON pup.product_id = pso.product_id 
        AND pup.packing_unit_id = pso.packing_unit_id
    JOIN materialized_views.retailer_polygon rp ON rp.retailer_id = so.retailer_id
    JOIN districts ON districts.id = rp.district_id
    JOIN cities ON cities.id = districts.city_id
    JOIN states ON states.id = cities.state_id
    JOIN regions ON regions.id = states.region_id
    JOIN whs ON whs.region_name = CASE WHEN regions.id = 2 THEN states.name_en ELSE regions.name_en END
    WHERE so.created_at::date BETWEEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '4 months') AND CURRENT_DATE - 1
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count > 0
    GROUP BY whs.warehouse_id, whs.wh, so.parent_sales_order_id, so.retailer_id
),

-- Calculate warehouse-level statistics
warehouse_stats AS (
    SELECT 
        warehouse_id,
        warehouse_name,
        COUNT(DISTINCT parent_sales_order_id) as total_orders,
        COUNT(DISTINCT retailer_id) as total_retailers,
        AVG(ticket_size) as avg_ticket_size,
        MEDIAN(ticket_size) as median_ticket_size,
        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY ticket_size) as p75_ticket_size,
        PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY ticket_size) as p90_ticket_size,
        MAX(ticket_size) as max_ticket_size,
        AVG(order_weight_kg) as avg_order_weight_kg,
        MEDIAN(order_weight_kg) as median_order_weight_kg
    FROM ticket_sizes
    WHERE ticket_size > 0
    GROUP BY warehouse_id, warehouse_name
)

SELECT 
    warehouse_id,
    warehouse_name,
    total_orders,
    total_retailers,
    ROUND(avg_ticket_size, 2) as avg_ticket_size,
    ROUND(median_ticket_size, 2) as median_ticket_size,
    ROUND(p75_ticket_size, 2) as p75_ticket_size,
    ROUND(p90_ticket_size, 2) as p90_ticket_size,
    ROUND(max_ticket_size, 2) as max_ticket_size,
    ROUND(avg_order_weight_kg, 2) as avg_order_weight_kg,
    ROUND(median_order_weight_kg, 2) as median_order_weight_kg,
    -- Calculate how many orders fit in one car based on weight
    ROUND({WS_CAR_CAPACITY_TONS * 1000} / NULLIF(avg_order_weight_kg, 0), 1) as orders_per_car_by_weight
FROM warehouse_stats
ORDER BY warehouse_id
'''

ws_ticket_data = snowflake_query("Egypt", query)
ws_ticket_data.columns = ws_ticket_data.columns.str.lower()
for col in ws_ticket_data.columns:
    ws_ticket_data[col] = pd.to_numeric(ws_ticket_data[col], errors='ignore')

print("=== WAREHOUSE TICKET SIZE STATISTICS ===")
print(ws_ticket_data[['warehouse_name', 'avg_ticket_size', 'median_ticket_size', 'avg_order_weight_kg']].to_string(index=False))
print(f"\nOverall average ticket size: {ws_ticket_data['avg_ticket_size'].mean():.2f} EGP")


=== WAREHOUSE TICKET SIZE STATISTICS ===
warehouse_name  avg_ticket_size  median_ticket_size  avg_order_weight_kg
      Mostorod          6221.75             4650.00               139.59
         Tanta          4798.51             3507.63               120.62
       El-Marg          6221.75             4650.00               139.59
       Sharqya          5050.65             3845.75               117.40
      Barageel          6765.23             4905.50               137.76
     El-Mahala          4798.51             3507.63               120.62
   Mansoura FC          5050.65             3845.75               117.40
    Bani sweif          4956.83             3665.25               137.77
     Assiut FC          4956.83             3665.25               137.77
         Sohag          4956.83             3665.25               137.77
 Menya Samalot          4956.83             3665.25               137.77
 Khorshed Alex          5029.32             3497.58               113.70
      Sakk

In [53]:
# Merge ticket size data with final_data (including orders_per_car_by_weight)
final_data = final_data.merge(
    ws_ticket_data[['warehouse_id', 'avg_ticket_size', 'median_ticket_size', 'avg_order_weight_kg', 'orders_per_car_by_weight']], 
    on='warehouse_id', 
    how='left'
)

def calculate_ws_new_logic(row):
    """
    Calculate wholesale pricing based on delivery savings.
    
    Logic:
    - Car cost = 900 EGP, but car serves multiple orders per trip
    - Car cost per order = 900 / orders_per_car
    - If retailer consolidates, they save N orders worth of car cost
    - Savings = deliveries_saved * (car_cost / orders_per_car)
    - Calculate scenarios from 2x to max_multiplier (capped by max TS)
    
    Returns: Dict with optimal scenario
    """
    
    # Get values
    current_price = row['packing_unit_price']
    wac = row['wac_p']
    avg_ts = row.get('avg_ticket_size', 4000)  # Default 4000 if missing
    tier_2_price = row['tier_2_price']
    
    # Get orders per car (how many orders fit in one car trip based on weight)
    orders_per_car = row.get('orders_per_car_by_weight', 10)  # Default 10 if missing
    if pd.isna(orders_per_car) or orders_per_car <= 0:
        orders_per_car = 10
    
    # Calculate car cost per order
    car_cost_per_order = WS_CAR_COST / orders_per_car
    
    if pd.isna(avg_ts) or avg_ts <= 0:
        avg_ts = 4000
    
    if pd.isna(current_price) or pd.isna(wac) or current_price <= 0 or wac <= 0 or pd.isna(tier_2_price):
        return pd.Series({
            'ws_new_multiplier': None,
            'ws_new_order_value': None,
            'ws_new_qty': None,
            'ws_new_deliveries_saved': None,
            'ws_new_car_cost_per_order': None,
            'ws_new_total_savings': None,
            'ws_new_discount_per_unit': None,
            'ws_new_price': None,
            'ws_new_margin': None,
            'ws_new_savings_pct': None
        })
    
    # Calculate max multiplier based on constraints
    # Max by ticket size: WS_MAX_TICKET_SIZE / avg_ts
    # No arbitrary cap - let WS_MAX_TICKET_SIZE (50K) be the only limit
    max_multiplier = int(WS_MAX_TICKET_SIZE / avg_ts)
    
    best_scenario = None
    best_savings_pct = 0
    
    # Test scenarios from 2x to max_multiplier
    for multiplier in range(2, max_multiplier + 1):
        # Order value at this multiplier
        order_value = avg_ts * multiplier
        
        # Deliveries saved = multiplier - 1 (consolidating multiple orders into one)
        deliveries_saved = multiplier - 1
        
        # Total savings = deliveries_saved * car_cost_per_order
        # This is the actual cost saving from consolidating orders
        total_savings = deliveries_saved * car_cost_per_order
        
        # How many units of this SKU fit in this order value?
        qty_at_current_price = order_value / current_price
        
        if qty_at_current_price <= 0:
            continue
        
        # Discount per unit from car cost savings
        discount_per_unit = total_savings / qty_at_current_price
        
        # New price after passing car cost savings
        new_price = tier_2_price - discount_per_unit
        
        # Check if price stays above WAC with minimum margin
        min_acceptable_price = wac * (1 + WS_MIN_MARGIN)
        
        if new_price >= min_acceptable_price:
            # Calculate margin at new price
            margin = (new_price - wac) / new_price
            
            # Savings percentage for retailer
            savings_pct = (discount_per_unit / current_price) * 100
            
            # Keep track of best scenario (highest savings while valid)
            if savings_pct > best_savings_pct:
                best_savings_pct = savings_pct
                best_scenario = {
                    'ws_new_multiplier': multiplier,
                    'ws_new_order_value': round(order_value, 2),
                    'ws_new_qty': round(qty_at_current_price, 0),
                    'ws_new_deliveries_saved': deliveries_saved,
                    'ws_new_car_cost_per_order': round(car_cost_per_order, 2),
                    'ws_new_total_savings': round(total_savings, 2),
                    'ws_new_discount_per_unit': round(discount_per_unit, 2),
                    'ws_new_price': round(new_price, 2),
                    'ws_new_margin': round(margin, 4),
                    'ws_new_savings_pct': round(savings_pct, 2)
                }
    
    if best_scenario:
        return pd.Series(best_scenario)
    else:
        return pd.Series({
            'ws_new_multiplier': None,
            'ws_new_order_value': None,
            'ws_new_qty': None,
            'ws_new_deliveries_saved': None,
            'ws_new_car_cost_per_order': None,
            'ws_new_total_savings': None,
            'ws_new_discount_per_unit': None,
            'ws_new_price': None,
            'ws_new_margin': None,
            'ws_new_savings_pct': None
        })

# Apply the new wholesale logic
print("Calculating new wholesale logic based on delivery savings...")
ws_new_results = final_data.apply(calculate_ws_new_logic, axis=1)
final_data = pd.concat([final_data, ws_new_results], axis=1)

# Summary statistics
valid_ws_new = final_data['ws_new_price'].notna().sum()
print(f"\n=== NEW WHOLESALE LOGIC SUMMARY ===")
print(f"SKUs with valid WS new price: {valid_ws_new} / {len(final_data)}")
print(f"Total car cost: {WS_CAR_COST} EGP")
print(f"Average orders per car: {final_data['orders_per_car_by_weight'].mean():.1f}")
print(f"Average car cost per order: {WS_CAR_COST / final_data['orders_per_car_by_weight'].mean():.2f} EGP")

if valid_ws_new > 0:
    print(f"\nOrder Consolidation:")
    print(f"  Average multiplier: {final_data['ws_new_multiplier'].mean():.1f}x of avg ticket size")
    print(f"  Average order value needed: {final_data['ws_new_order_value'].mean():.2f} EGP")
    print(f"  Average deliveries saved: {final_data['ws_new_deliveries_saved'].mean():.1f}")
    
    print(f"\nCar Cost Savings:")
    print(f"  Average car cost per order: {final_data['ws_new_car_cost_per_order'].mean():.2f} EGP")
    print(f"  Average total savings: {final_data['ws_new_total_savings'].mean():.2f} EGP")
    print(f"  Average discount per unit: {final_data['ws_new_discount_per_unit'].mean():.2f} EGP")
    
    print(f"\nPricing:")
    print(f"  Average WS new price margin: {final_data['ws_new_margin'].mean()*100:.2f}%")
    print(f"  Average retailer savings: {final_data['ws_new_savings_pct'].mean():.2f}%")
    
    # Distribution of multipliers
    print(f"\nMultiplier distribution:")
    print(final_data['ws_new_multiplier'].value_counts().sort_index())


Calculating new wholesale logic based on delivery savings...

=== NEW WHOLESALE LOGIC SUMMARY ===
SKUs with valid WS new price: 1933 / 1963
Total car cost: 1100 EGP
Average orders per car: 14.0
Average car cost per order: 78.66 EGP

Order Consolidation:
  Average multiplier: 6.9x of avg ticket size
  Average order value needed: 36086.50 EGP
  Average deliveries saved: 5.9

Car Cost Savings:
  Average car cost per order: 79.13 EGP
  Average total savings: 463.62 EGP
  Average discount per unit: 3.01 EGP

Pricing:
  Average WS new price margin: 3.07%
  Average retailer savings: 1.27%

Multiplier distribution:
ws_new_multiplier
2.0     32
3.0     27
4.0     28
5.0    314
6.0    154
7.0    486
8.0    892
Name: count, dtype: int64


In [None]:
# =============================================================================
# FINAL RANKING FILTER
# =============================================================================

# Re-rank within each warehouse and filter to top products
final_data['new_rank'] = final_data.groupby(['warehouse_id'])['final_rank'].rank(method='dense', ascending=True)
final_data = final_data[final_data['new_rank'] <= FINAL_PRODUCTS_PER_WAREHOUSE]

print(f"✓ Filtered to top {FINAL_PRODUCTS_PER_WAREHOUSE} products per warehouse: {len(final_data)} total SKUs")

In [55]:
# =============================================================================
# CALCULATE ADDITIONAL METRICS
# =============================================================================

# --- Stretch Percentages (how much retailers need to increase to reach each tier) ---
# Already included from tiers_selection: tier_1_increase_pct, tier_2_increase_pct
# These show: (tier_qty - median_qty) / median_qty * 100

# Rename for clarity
final_data['stretch_to_tier_1_pct'] = final_data['tier_1_increase_pct']
final_data['stretch_to_tier_2_pct'] = final_data['tier_2_increase_pct']

# --- Margins for each price tier ---
# Margin = (price - wac) / price
final_data['tier_1_margin'] = ((final_data['tier_1_price'] - final_data['wac_p']) / final_data['tier_1_price']).round(4)
final_data['tier_2_margin'] = ((final_data['tier_2_price'] - final_data['wac_p']) / final_data['tier_2_price']).round(4)
final_data['WS_margin'] = ((final_data['WS_price'] - final_data['wac_p']) / final_data['wac_p']).round(4)
final_data['current_margin'] = ((final_data['packing_unit_price'] - final_data['wac_p']) / final_data['packing_unit_price']).round(4)

# --- Discount calculations ---
# Absolute discounts (price reduction from current price)
final_data['discount_1'] = (final_data['packing_unit_price'] - final_data['tier_1_price']).round(2)
final_data['discount_2'] = (final_data['packing_unit_price'] - final_data['tier_2_price']).round(2)

# Discount percentages
final_data['discount_1_pct'] = ((final_data['discount_1'] / final_data['packing_unit_price']) * 100).round(2)
final_data['discount_2_pct'] = ((final_data['discount_2'] / final_data['packing_unit_price']) * 100).round(2)

# --- Quantity and Discount Ratios ---
# Quantity ratio (tier_2_qty / tier_1_qty)
final_data['qty_ratio'] = (final_data['tier_2_qty'] / final_data['tier_1_qty']).round(2)

# Discount ratio (discount_2 / discount_1)
final_data['discount_ratio'] = (final_data['discount_2'] / final_data['discount_1']).round(2)

# Elasticity ratio = discount_ratio / qty_ratio
# This shows how much extra discount per unit of quantity increase
final_data['elasticity_ratio'] = (final_data['discount_ratio'] / final_data['qty_ratio']).round(2)

print("=== METRICS SUMMARY ===")
print(f"\nStretch Analysis (how much retailers need to increase orders):")
print(f"  Average stretch to Tier 1: {final_data['stretch_to_tier_1_pct'].mean():.1f}%")
print(f"  Average stretch to Tier 2: {final_data['stretch_to_tier_2_pct'].mean():.1f}%")

print(f"\nMargin Analysis:")
print(f"  Current margin:  {final_data['current_margin'].mean()*100:.2f}%")
print(f"  Tier 1 margin:   {final_data['tier_1_margin'].mean()*100:.2f}%")
print(f"  Tier 2 margin:   {final_data['tier_2_margin'].mean()*100:.2f}%")
print(f"  WS margin:       {final_data['WS_margin'].mean()*100:.2f}%")

print(f"\nDiscount Analysis:")
print(f"  Average Tier 1 discount: {final_data['discount_1_pct'].mean():.2f}%")
print(f"  Average Tier 2 discount: {final_data['discount_2_pct'].mean():.2f}%")

print(f"\nElasticity Analysis (discount increase vs quantity increase):")
print(f"  Average qty ratio (T2/T1): {final_data['qty_ratio'].mean():.2f}x")
print(f"  Average discount ratio (D2/D1): {final_data['discount_ratio'].mean():.2f}x")
print(f"  Average elasticity ratio: {final_data['elasticity_ratio'].mean():.2f}")


=== METRICS SUMMARY ===

Stretch Analysis (how much retailers need to increase orders):
  Average stretch to Tier 1: 253.3%
  Average stretch to Tier 2: 514.9%

Margin Analysis:
  Current margin:  5.83%
  Tier 1 margin:   5.11%
  Tier 2 margin:   4.25%
  WS margin:       3.17%

Discount Analysis:
  Average Tier 1 discount: 0.76%
  Average Tier 2 discount: 1.65%

Elasticity Analysis (discount increase vs quantity increase):
  Average qty ratio (T2/T1): 1.74x
  Average discount ratio (D2/D1): 2.20x
  Average elasticity ratio: 1.27


## 7. Final Ranking & Export


In [67]:
# =============================================================================
# CREATE UPLOAD FORMAT
# =============================================================================
# Format: ONE row per warehouse_id
# - Discounts Group 1: List of [tier 1 items + wholesale items] (max 200, overflow goes to Group 2)
# - Discounts Group 2: List of [tier 2 items + overflow from Group 1]
# Each item format: [product_id, packing_unit_id, quantity, discount_pct]

MAX_GROUP_SIZE = 200
MAX_DISCOUNT_CAP = 6.0  # Maximum discount capped at 6%

final_quantity_discount = pd.DataFrame(columns=['warehouse_id', 'Discounts Group 1', 'Discounts Group 2', 'Description'])

for wh_id in final_data.warehouse_id.unique():
    warehouse_data = final_data[final_data['warehouse_id'] == wh_id]
    warehouse_id = int(wh_id)
    
    # Collect all tier 1 items
    tier_1_items = []
    # Collect all tier 2 items
    tier_2_items = []
    # Collect all wholesale items
    ws_items = []
    
    for i, r in warehouse_data.iterrows():
        product_id = int(r['product_id'])
        packing_unit_id = int(r['packing_unit_id'])
        current_price = r['packing_unit_price']
        
        # Tier 1 (cap discount at MAX_DISCOUNT_CAP)
        q_1 = int(r['tier_1_qty'])
        d_1 = min(round(r['discount_1_pct'], 2), MAX_DISCOUNT_CAP)
        tier_1_items.append([product_id, packing_unit_id, q_1, d_1])
        
        # Tier 2 (cap discount at MAX_DISCOUNT_CAP)
        q_2 = int(r['tier_2_qty'])
        d_2 = min(round(r['discount_2_pct'], 2), MAX_DISCOUNT_CAP)
        tier_2_items.append([product_id, packing_unit_id, q_2, d_2])
        
        # Wholesale (new logic) - cap discount at MAX_DISCOUNT_CAP
        ws_qty = r.get('ws_new_qty', None)
        ws_price = r.get('ws_new_price', None)
        
        if pd.notna(ws_qty) and pd.notna(ws_price) and ws_qty > 0 and current_price > 0:
            q_ws = int(ws_qty)
            d_ws = min(round(((current_price - ws_price) / current_price) * 100, 2), MAX_DISCOUNT_CAP)
            ws_items.append([product_id, packing_unit_id, q_ws, d_ws])
    
    # Group 1: Tier 1 + Wholesale (max 200)
    group_1_items = tier_1_items + ws_items
    
    # Group 2: Tier 2 + overflow from Group 1
    if len(group_1_items) > MAX_GROUP_SIZE:
        # Overflow goes to Group 2
        overflow = group_1_items[MAX_GROUP_SIZE:]
        group_1_items = group_1_items[:MAX_GROUP_SIZE]
        group_2_items = tier_2_items + overflow
    else:
        group_2_items = tier_2_items
    
    new_row = {
        'warehouse_id': warehouse_id,
        'Discounts Group 1': group_1_items,
        'Discounts Group 2': group_2_items,
        'Description': f'{warehouse_id}QD'
    }
    final_quantity_discount = pd.concat([final_quantity_discount, pd.DataFrame([new_row])], ignore_index=True)

# Summary
print(f"Upload format created: {len(final_quantity_discount)} warehouse rows")
print(f"\nPer warehouse breakdown:")
for idx, row in final_quantity_discount.iterrows():
    wh = row['warehouse_id']
    g1_count = len(row['Discounts Group 1'])
    g2_count = len(row['Discounts Group 2'])
    print(f"  WH {wh}: Group 1 = {g1_count} items, Group 2 = {g2_count} items")

# =============================================================================
# SAVE FILES
# =============================================================================

# Save detailed data
detailed_file = 'QD_detailed.xlsx'
final_data.to_excel(detailed_file, index=False)
print(f"\n=== DETAILED FILE ===")
print(f"Saved {len(final_data)} SKUs to '{detailed_file}'")

# Save upload format
upload_file = 'QD_upload.xlsx'
final_quantity_discount.to_excel(upload_file, index=False)
print(f"\n=== UPLOAD FILE ===")
print(f"Saved {len(final_quantity_discount)} rows to '{upload_file}'")
print(f"Columns: {list(final_quantity_discount.columns)}")

Upload format created: 12 warehouse rows

Per warehouse breakdown:
  WH 501: Group 1 = 200 items, Group 2 = 196 items
  WH 401: Group 1 = 200 items, Group 2 = 196 items
  WH 236: Group 1 = 200 items, Group 2 = 199 items
  WH 337: Group 1 = 200 items, Group 2 = 196 items
  WH 797: Group 1 = 200 items, Group 2 = 198 items
  WH 339: Group 1 = 200 items, Group 2 = 197 items
  WH 703: Group 1 = 200 items, Group 2 = 197 items
  WH 1: Group 1 = 200 items, Group 2 = 199 items
  WH 962: Group 1 = 200 items, Group 2 = 199 items
  WH 170: Group 1 = 200 items, Group 2 = 197 items
  WH 632: Group 1 = 200 items, Group 2 = 196 items
  WH 8: Group 1 = 200 items, Group 2 = 197 items

=== DETAILED FILE ===
Saved 1596 SKUs to 'QD_detailed.xlsx'

=== UPLOAD FILE ===
Saved 12 rows to 'QD_upload.xlsx'
Columns: ['warehouse_id', 'Discounts Group 1', 'Discounts Group 2', 'Description']


In [None]:
# Warehouse to Tag ID mapping for upload
df_warehouse_mapping = pd.DataFrame({
    'warehouse_name': ['Assiut FC', 'Bani sweif', 'Barageel', 'El-Mahala', 'Khorshed Alex', 
                       'Mansoura FC', 'Menya Samalot', 'Mostorod', 'Sakkarah', 'Sharqya', 
                       'Sohag', 'Tanta'],
    'warehouse_id':   [501, 401, 236, 337, 797, 339, 703, 1, 962, 170, 632, 8],
    'tag_id':         [3301, 3302, 3303, 3304, 3305, 3306, 3307, 3308, 3309, 3310, 3311, 3312]
})


In [None]:
# Merge upload data with warehouse mapping
to_upload = final_quantity_discount.merge(df_warehouse_mapping, on='warehouse_id')

In [None]:
# =============================================================================
# PREPARE FINAL UPLOAD FILE
# =============================================================================

# Set description and date/time fields
to_upload['Description'] = to_upload['warehouse_name'].astype(str) + "_QD"

start_date = datetime.now() + timedelta(minutes=10)
start_date_str = start_date.strftime('%d/%m/%Y %H:%M')

end_date = datetime.now() + timedelta(days=3)
end_date = end_date.replace(hour=23, minute=59, second=0, microsecond=0)
end_date_str = end_date.strftime('%d/%m/%Y %H:%M')

to_upload['Start Date/Time'] = start_date_str
to_upload['End Date/Time'] = end_date_str
to_upload = to_upload.rename(columns={'tag_id': 'Tag ID'})

# Aggregate by Tag ID
to_upload = to_upload.groupby(
    ['Tag ID', 'Description', 'Start Date/Time', 'End Date/Time'], 
    as_index=False
).agg({
    'Discounts Group 1': list,
    'Discounts Group 2': list
})

# Save upload file
to_upload.to_excel('QD_upload.xlsx', index=False)
print(f"✓ Saved upload file: QD_upload.xlsx ({len(to_upload)} warehouses)")

Unnamed: 0,Tag ID,Description,Start Date/Time,End Date/Time,Discounts Group 1,Discounts Group 2
0,3301,Assiut FC_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[2778, 2, 5, 0.57], [6935, 2, 7, 1.44], [693...","[[[2778, 2, 11, 1.38], [6935, 2, 14, 3.61], [6..."
1,3302,Bani sweif_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[141, 2, 4, 0.68], [2328, 2, 5, 0.39], [8650...","[[[141, 2, 7, 1.31], [2328, 2, 8, 1.23], [8650..."
2,3303,Barageel_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[6935, 2, 8, 0.97], [362, 2, 4, 0.55], [141,...","[[[6935, 2, 16, 2.58], [362, 2, 7, 1.06], [141..."
3,3304,El-Mahala_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[88, 10, 8, 0.93], [8915, 2, 5, 0.6], [6935,...","[[[88, 10, 16, 2.06], [8915, 2, 8, 1.05], [693..."
4,3305,Khorshed Alex_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[6935, 2, 7, 0.64], [141, 2, 4, 0.55], [2778...","[[[6935, 2, 18, 2.84], [141, 2, 7, 1.06], [277..."
5,3306,Mansoura FC_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[6935, 2, 6, 1.87], [305, 2, 4, 1.74], [2778...","[[[6935, 2, 10, 3.91], [305, 2, 7, 3.34], [277..."
6,3307,Menya Samalot_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[2328, 2, 5, 0.39], [6935, 2, 7, 1.44], [141...","[[[2328, 2, 8, 1.23], [6935, 2, 14, 3.61], [14..."
7,3308,Mostorod_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[3, 2, 4, 0.47], [8672, 2, 6, 0.76], [2049, ...","[[[3, 2, 7, 0.91], [8672, 2, 10, 1.4], [2049, ..."
8,3309,Sakkarah_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[305, 2, 4, 1.25], [6935, 2, 8, 0.97], [326,...","[[[305, 2, 7, 2.4], [6935, 2, 16, 2.74], [326,..."
9,3310,Sharqya_QD,03/12/2025 15:24,06/12/2025 23:59,"[[[11685, 2, 5, 1.6], [130, 1, 4, 1.36], [6935...","[[[11685, 2, 8, 3.15], [130, 1, 7, 2.63], [693..."


In [None]:
# =============================================================================
# UPLOAD TO API
# =============================================================================

print("Uploading QD file to API...")
response = post_QD('QD_upload.xlsx')

if response.ok:
    print(f"✓ Upload succeeded (status: {response.status_code})")
else:
    print(f"❌ Upload failed (status: {response.status_code})")
    print(response.content)

In [None]:
# =============================================================================
# PREPARE CART RULES UPDATE
# =============================================================================

# Merge current cart rules with new tier data
cart_rules_update = live_cart_rules.merge(
    final_data[['warehouse_id', 'product_id', 'packing_unit_id', 'tier_2_qty', 'ws_new_qty']],
    on=['warehouse_id', 'product_id', 'packing_unit_id']
)
cart_rules_update = cart_rules_update.fillna(0)

# New cart rule = max of tier_2_qty and ws_new_qty
cart_rules_update['tier_2'] = np.maximum(cart_rules_update['tier_2_qty'], cart_rules_update['ws_new_qty'])

# Only update rules that need to increase
cart_rules_update = cart_rules_update[cart_rules_update['tier_2'] > cart_rules_update['current_cart_rule']]
cart_rules_update = cart_rules_update[['cohort_id', 'product_id', 'packing_unit_id', 'tier_2']]

print(f"✓ Cart rules to update: {len(cart_rules_update)} products across {cart_rules_update['cohort_id'].nunique()} cohorts")

Unnamed: 0,cohort_id,product_id,packing_unit_id,tier_2
0,702,11491,2,170.0
1,702,6936,2,334.0
2,702,8650,2,272.0
3,702,2438,2,753.0
4,702,8853,2,334.0
...,...,...,...,...
1591,1123,8915,2,260.0
1592,1124,416,1,118.0
1593,1123,1413,15,134.0
1594,1124,2703,3,439.0


In [None]:
# =============================================================================
# UPLOAD CART RULES BY COHORT
# =============================================================================

print("Uploading cart rules by cohort...")

for cohort in cart_rules_update.cohort_id.unique():
    req_data = cart_rules_update[cart_rules_update['cohort_id'] == cohort]
    
    if len(req_data) > 0:
        # Prepare data for upload
        req_data = req_data[['product_id', 'packing_unit_id', 'tier_2']]
        req_data.columns = ['Product ID', 'Packing Unit ID', 'Cart Rules']
        
        # Save and upload
        filename = f'CartRules_{cohort}.xlsx'
        req_data.to_excel(filename, index=False, engine='xlsxwriter')
        
        time.sleep(5)
        response = post_cart_rules(cohort, filename)
        
        if response.ok:
            print(f"  ✓ Cohort {cohort}: {len(req_data)} rules uploaded")
        else:
            print(f"  ❌ Cohort {cohort}: Upload failed")
            print(response.content)
            break

print("\n✓ Cart rules upload complete!")