# Quantity Discount (QD) Handler V2

Handles calculation, creation, activation, and deactivation of Quantity Discounts with improved logic.

## Key Changes from V1
1. **Priority-based selection**: High DOH SKUs (inv>10K) ranked first, then others - total 400 tier limit
2. **Sequential performance adjustment**: Lower qty first (2x max), then boost discount (3x max)
3. **Tier-specific thresholds**: LOW (T1:3%, T2:7%, T3:5%), HIGH (LOW+10%)
4. **Margin-based fallbacks**: For SKUs without market/margin data
5. **Elasticity constraint**: qty_ratio < discount_ratio (T1/T2 only, NOT T3)
6. **More accessible wholesale**: 2x minimum multiplier (was 3x)

## Usage
```python
%run qd_handler_v2.ipynb

# Process QD with DataFrame from Module 3
result = process_qd_v2(df_qd, dry_run=True)
```

## Input Requirements (DataFrame columns from Module 3)
**Identifiers:**
- `product_id`, `warehouse_id`, `cohort_id`, `sku`, `brand`, `cat`

**Pricing Data:**
- `wac_p`, `current_price`, `new_price`, `target_margin`, `min_boundary`

**Market Margins:**
- `below_market`, `market_min`, `market_25`, `market_50`, `market_75`, `market_max`, `above_market`

**Margin Tiers:**
- `margin_tier_1` through `margin_tier_above_2`

**Performance Data (NEW):**
- `responsive_doh`, `stocks`, `doh` - for DOH prioritization
- `t1_cntrb_uth`, `t2_cntrb_uth`, `t3_cntrb_uth` - tier contribution
- `has_active_qd` - existing QD flag
- `mtd_qty` - for ranking

**QD Configuration:**
- `keep_qd_tiers` - List of tiers to keep, e.g., `['T1', 'T2']`


In [None]:
# =============================================================================
# IMPORTS & CONFIGURATION
# =============================================================================
import pandas as pd
import numpy as np
import requests
from datetime import datetime, timedelta
import pytz
import os
import ast
import json
import time
import base64
import boto3
from botocore.exceptions import ClientError
import snowflake.connector
import sys

%run queries_module.ipynb
# Add parent directory for imports
sys.path.insert(0, '..')
import setup_environment_2
from common_functions import send_file_slack

# Initialize environment variables (loads Snowflake credentials)
setup_environment_2.initialize_env()

# Cairo Timezone
CAIRO_TZ = pytz.timezone('Africa/Cairo')
CAIRO_NOW = datetime.now(CAIRO_TZ)
TODAY = CAIRO_NOW.date()

# =============================================================================
# SNOWFLAKE CONNECTION
# =============================================================================
def query_snowflake(query):
    """Execute a query on Snowflake and return results as DataFrame."""
    con = snowflake.connector.connect(
        user=os.environ["SNOWFLAKE_USERNAME"],
        account=os.environ["SNOWFLAKE_ACCOUNT"],
        password=os.environ["SNOWFLAKE_PASSWORD"],
        database=os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        data = cur.fetchall()
        columns = [desc[0].lower() for desc in cur.description]
        return pd.DataFrame(data, columns=columns)
    finally:
        con.close()

def get_snowflake_timezone():
    result = query_snowflake("SHOW PARAMETERS LIKE 'TIMEZONE'")
    return result['value'].iloc[0] if len(result) > 0 else "UTC"

TIMEZONE = get_snowflake_timezone()

# =============================================================================
# AWS & API FUNCTIONS
# =============================================================================
def get_secret(secret_name: str) -> str:
    """Retrieve a secret from AWS Secrets Manager."""
    region_name = "us-east-1"
    session = boto3.session.Session()
    client = session.client(service_name='secretsmanager', region_name=region_name)

    try:
        response = client.get_secret_value(SecretId=secret_name)
    except ClientError as e:
        print(f"AWS Error: {e}")
        raise e
    
    if 'SecretString' in response:
        return response['SecretString']
    return base64.b64decode(response['SecretBinary'])


def get_access_token(url: str, client_id: str, client_secret: str) -> str:
    """Get OAuth2 access token for MaxAB API authentication."""
    response = requests.post(
        url,
        data={
            'grant_type': 'password',
            'client_id': client_id,
            'client_secret': client_secret,
            'username': API_USERNAME,
            'password': API_PASSWORD
        }
    )
    return response.json()['access_token']


def _get_api_token() -> str:
    """Get a fresh API token for MaxAB API requests."""
    return get_access_token(
        'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
        'main-system-externals',
        API_SECRET
    )

# =============================================================================
# API CREDENTIALS INITIALIZATION
# =============================================================================
pricing_api_secret = json.loads(get_secret("prod/pricing/api/"))
API_USERNAME = pricing_api_secret["egypt_username"]
API_PASSWORD = pricing_api_secret["egypt_password"]
API_SECRET = pricing_api_secret["egypt_secret"]

# =============================================================================
# API CONFIGURATION
# =============================================================================
QD_API_URL = 'https://api.maxab.info/commerce/api/admins/v1/quantity-discounts/'

print("✓ QD Handler V2 initialized")
print(f"  Timezone: {TIMEZONE}")


In [None]:
# =============================================================================
# V2 CONFIGURATION CONSTANTS
# =============================================================================

# -----------------------------------------------------------------------------
# CONVERSION THRESHOLDS (tier-specific)
# -----------------------------------------------------------------------------
# LOW thresholds - below this, take action to improve conversion
T1_LOW_CONVERSION_THRESHOLD = 3   # %
T2_LOW_CONVERSION_THRESHOLD = 7   # %
T3_LOW_CONVERSION_THRESHOLD = 5   # %

# HIGH thresholds (LOW + 10%) - above this, reduce discount to save margin
T1_HIGH_CONVERSION_THRESHOLD = 13  # % (3 + 10)
T2_HIGH_CONVERSION_THRESHOLD = 17  # % (7 + 10)
T3_HIGH_CONVERSION_THRESHOLD = 15  # % (5 + 10)

# -----------------------------------------------------------------------------
# QUANTITY ADJUSTMENT (applied FIRST when conversion is low)
# -----------------------------------------------------------------------------
QTY_REDUCTION_PCT = 0.10          # Reduce qty by 10% each attempt
QTY_REDUCTION_MIN = 1             # Minimum reduction of 1 unit
MAX_QTY_LOWER_COUNT = 2           # Max 2 times to lower quantity

# -----------------------------------------------------------------------------
# DISCOUNT ADJUSTMENT (applied AFTER qty adjustments exhausted)
# -----------------------------------------------------------------------------
ZERO_CONVERSION_MULTIPLIER = 1.25  # Boost discount by 25% when 0% contribution
LOW_CONVERSION_MULTIPLIER = 1.15   # Boost discount by 15% when below threshold
HIGH_CONVERSION_MULTIPLIER = 0.8   # Reduce discount by 20% when above HIGH
MAX_DISC_BOOST_COUNT = 3           # Max 3 times to boost discount

# -----------------------------------------------------------------------------
# MARGIN-BASED FALLBACK (when no market/margin tier prices available)
# -----------------------------------------------------------------------------
# Percentage of current margin to KEEP (give up the rest as discount)
FALLBACK_T1_MARGIN_KEEP = 0.90   # Keep 90% margin, give up 10%
FALLBACK_T2_MARGIN_KEEP = 0.75   # Keep 75% margin, give up 25%
FALLBACK_T3_MARGIN_KEEP = 0.60   # Keep 60% margin, give up 40%

# -----------------------------------------------------------------------------
# DISCOUNT BOUNDS
# -----------------------------------------------------------------------------
MAX_DISCOUNT_PCT = 5.0    # Maximum allowed discount from current price
MIN_DISCOUNT_PCT = 0.35   # Minimum required discount from current price

# Elasticity ratio constraints (T1/T2 ONLY - T3 exempt)
MIN_RATIO = 1.1  # Minimum elasticity ratio
MAX_RATIO = 3.0  # Maximum elasticity ratio

# Minimum gap between tier prices
MIN_GAP_PCT = 0.25

# -----------------------------------------------------------------------------
# WHOLESALE (TIER 3) CONFIGURATION
# -----------------------------------------------------------------------------
WS_CAR_COST = 1400           # Cost per delivery (EGP)
WS_CAR_CAPACITY_TONS = 1.8   # Max car capacity in tons
WS_MAX_TICKET_SIZE = 35000   # Maximum ticket size (EGP)
WS_MIN_MARGIN = -0.02        # Minimum margin (-2%) above WAC
WS_MIN_MULTIPLIER = 2        # Minimum multiplier (was 3 in V1)

# -----------------------------------------------------------------------------
# HIGH DOH PRIORITIZATION
# -----------------------------------------------------------------------------
HIGH_DOH_THRESHOLD = 30              # Days on hand threshold
HIGH_DOH_INVENTORY_THRESHOLD = 10000 # EGP - minimum inventory value for priority

# -----------------------------------------------------------------------------
# SELECTION LIMITS
# -----------------------------------------------------------------------------
TOP_TIERS_PER_WAREHOUSE = 400  # Total tier entries per warehouse (not SKUs)

# -----------------------------------------------------------------------------
# UPLOAD FORMAT CONFIGURATION
# -----------------------------------------------------------------------------
MAX_GROUP_SIZE = 200         # Max items per discount group in API
MAX_DISCOUNT_CAP_T1 = 4.0    # Maximum discount cap for Tier 1
MAX_DISCOUNT_CAP_T2 = 5.0    # Maximum discount cap for Tier 2
MAX_DISCOUNT_CAP_WS = 6.0    # Maximum discount cap for Wholesale

# QD Duration
QD_DURATION_HOURS = 14      # QD valid for 14 hours

# -----------------------------------------------------------------------------
# OUTPUT DIRECTORY CONFIGURATION
# -----------------------------------------------------------------------------
QD_OUTPUT_DIR = 'qd_uploads_v2'  # Dedicated directory for QD handler V2 output
os.makedirs(QD_OUTPUT_DIR, exist_ok=True)

# -----------------------------------------------------------------------------
# WAREHOUSE TO TAG ID MAPPING
# -----------------------------------------------------------------------------
WAREHOUSE_TAG_MAPPING = {
    501: {'name': 'Assiut FC', 'tag_id': 3301},
    401: {'name': 'Bani sweif', 'tag_id': 3302},
    236: {'name': 'Barageel', 'tag_id': 3303},
    337: {'name': 'El-Mahala', 'tag_id': 3304},
    797: {'name': 'Khorshed Alex', 'tag_id': 3305},
    339: {'name': 'Mansoura FC', 'tag_id': 3306},
    703: {'name': 'Menya Samalot', 'tag_id': 3307},
    1: {'name': 'Mostorod', 'tag_id': 3308},
    962: {'name': 'Sakkarah', 'tag_id': 3309},
    170: {'name': 'Sharqya', 'tag_id': 3310},
    632: {'name': 'Sohag', 'tag_id': 3311},
    8: {'name': 'Tanta', 'tag_id': 3312},
    38: {'name': 'El-Marg', 'tag_id': 3313},
}

print("✓ V2 Configuration loaded:")
print(f"  LOW thresholds: T1={T1_LOW_CONVERSION_THRESHOLD}%, T2={T2_LOW_CONVERSION_THRESHOLD}%, T3={T3_LOW_CONVERSION_THRESHOLD}%")
print(f"  HIGH thresholds: T1={T1_HIGH_CONVERSION_THRESHOLD}%, T2={T2_HIGH_CONVERSION_THRESHOLD}%, T3={T3_HIGH_CONVERSION_THRESHOLD}%")
print(f"  Qty reduction: {QTY_REDUCTION_PCT*100}% (max {MAX_QTY_LOWER_COUNT}x)")
print(f"  Disc boost: x{LOW_CONVERSION_MULTIPLIER}/x{ZERO_CONVERSION_MULTIPLIER} (max {MAX_DISC_BOOST_COUNT}x)")
print(f"  Wholesale min multiplier: {WS_MIN_MULTIPLIER}x")
print(f"  High DOH priority: DOH>{HIGH_DOH_THRESHOLD} AND inv>{HIGH_DOH_INVENTORY_THRESHOLD}")
print(f"  Selection limit: {TOP_TIERS_PER_WAREHOUSE} tiers/warehouse")


In [None]:
# =============================================================================
# DATA FETCHING: PACKING UNITS & TIER QUANTITIES
# =============================================================================

def get_top_selling_packing_units(product_warehouse_list: list) -> pd.DataFrame:
    """
    Get the top-selling packing unit per product per warehouse (last 90 days).
    
    Args:
        product_warehouse_list: List of (product_id, warehouse_id) tuples
        
    Returns:
        DataFrame with product_id, warehouse_id, packing_unit_id, basic_unit_count
    """
    if not product_warehouse_list:
        return pd.DataFrame(columns=['product_id', 'warehouse_id', 'packing_unit_id', 'basic_unit_count'])
    
    tuples_str = ','.join([f"({int(p)}, {int(w)})" for p, w in product_warehouse_list])
    
    query = f'''
    WITH input_products AS (
        SELECT product_id, warehouse_id
        FROM (VALUES {tuples_str}) AS x(product_id, warehouse_id)
    ),
    
    pack_products as(
        select product_id,packing_unit_id,basic_unit_count,max(deleted_at) as deleted_at
        from packing_unit_products pup 
        group by all
    ),
    
    sales_by_pu AS (
        SELECT 
            pso.product_id,
            so.warehouse_id,
            pso.packing_unit_id,
            SUM(pso.total_price) as nmv
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN input_products ip ON ip.product_id = pso.product_id AND ip.warehouse_id = so.warehouse_id
        WHERE so.created_at >= CURRENT_DATE - 90
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
        GROUP BY 1, 2, 3
    ),
    
    ranked_pu AS (
        SELECT 
            s.product_id, 
            s.warehouse_id, 
            s.packing_unit_id,
            pup.basic_unit_count,
            s.nmv,
            ROW_NUMBER() OVER (PARTITION BY s.product_id, s.warehouse_id ORDER BY s.nmv DESC) as rnk
        FROM sales_by_pu s
        JOIN pack_products pup 
            ON pup.product_id = s.product_id 
            AND pup.packing_unit_id = s.packing_unit_id
        WHERE pup.deleted_at IS NULL 
    )
    
    SELECT product_id, warehouse_id, packing_unit_id, basic_unit_count
    FROM ranked_pu
    WHERE rnk = 1
    '''
    
    print("  Fetching top-selling packing units (last 90 days)...")
    df = query_snowflake(query)
    
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    
    print(f"    Found packing units for {len(df)} product-warehouse combinations")
    return df


def get_tier_quantities(product_warehouse_pu_list: list) -> pd.DataFrame:
    """
    Calculate tier quantities based on historical order data.
    
    Args:
        product_warehouse_pu_list: List of (warehouse_id, product_id, packing_unit_id) tuples
        
    Returns:
        DataFrame with tier_1_qty, tier_2_qty per product-warehouse
    """
    if not product_warehouse_pu_list:
        return pd.DataFrame(columns=['warehouse_id', 'product_id', 'packing_unit_id', 'tier_1_qty', 'tier_2_qty'])
    
    tuples_str = ','.join([f"({int(w)}, {int(p)}, {int(pu)})" for w, p, pu in product_warehouse_pu_list])
    
    query = f'''
    WITH selected_products AS (
        SELECT warehouse_id, product_id, packing_unit_id
        FROM (VALUES {tuples_str}) AS x(warehouse_id, product_id, packing_unit_id)
    ),
    
    base AS (
        SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
        FROM (
            SELECT x.*, TAGGABLE_ID as retailer_id 
            FROM (
                SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
                FROM cohorts 
                WHERE is_active = 'true'
                    AND id IN (700,701,702,703,704,1123,1124,1125,1126)
            ) x 
            JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
            WHERE dt.taggable_id not IN (
                SELECT taggable_id FROM DYNAMIC_TAGgables 
                WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
            )
        )
        QUALIFY rnk = 1 
    ),
    
    warehouse_mapping AS (
        SELECT * FROM (VALUES
            ('Cairo', 'Mostorod', 1),
            ('Giza', 'Barageel', 236),
            ('Giza', 'Sakkarah', 962),
            ('Delta West', 'El-Mahala', 337),
            ('Delta West', 'Tanta', 8),
            ('Delta East', 'Mansoura FC', 339),
            ('Delta East', 'Sharqya', 170),
            ('Upper Egypt', 'Assiut FC', 501),
            ('Upper Egypt', 'Bani sweif', 401),
            ('Upper Egypt', 'Menya Samalot', 703),
            ('Upper Egypt', 'Sohag', 632),
            ('Alexandria', 'Khorshed Alex', 797)
        ) x(region_name, wh, warehouse_id)
    ),
    
    raw_order_quantities AS (
        SELECT 
            whs.warehouse_id,
            pso.product_id,
            pso.packing_unit_id,
            so.parent_sales_order_id,
            so.retailer_id,
            so.created_at::date as order_date,
            SUM(pso.purchased_item_count) as order_qty,
            EXP(-0.02 * DATEDIFF('day', so.created_at::date, CURRENT_DATE)) as recency_weight
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN base ON base.retailer_id = so.retailer_id
        JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
        JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
        JOIN cities ON cities.id = districts.city_id
        JOIN states ON states.id = cities.state_id
        JOIN regions ON regions.id = states.region_id
        JOIN warehouse_mapping whs ON whs.region_name = CASE WHEN regions.id = 2 THEN states.name_en ELSE regions.name_en END
        JOIN selected_products sp ON sp.warehouse_id = whs.warehouse_id 
            AND sp.product_id = pso.product_id
            AND sp.packing_unit_id = pso.packing_unit_id
        WHERE TRUE
            AND so.created_at::date BETWEEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '4 months') AND CURRENT_DATE - 1
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
        GROUP BY 1, 2, 3, 4, 5, 6
    ),
    
    retailer_frequency AS (
        SELECT 
            warehouse_id, product_id, packing_unit_id, retailer_id,
            COUNT(DISTINCT parent_sales_order_id) as order_count,
            COUNT(DISTINCT DATE_TRUNC('week', order_date)) as weeks_ordered
        FROM raw_order_quantities
        GROUP BY 1, 2, 3, 4
    ),
    
    frequent_buyers AS (
        SELECT warehouse_id, product_id, packing_unit_id, retailer_id
        FROM retailer_frequency
        WHERE order_count >= 2 OR weeks_ordered >= 2
    ),
    
    filtered_orders AS (
        SELECT roq.*
        FROM raw_order_quantities roq
        JOIN frequent_buyers fb ON fb.warehouse_id = roq.warehouse_id
            AND fb.product_id = roq.product_id
            AND fb.packing_unit_id = roq.packing_unit_id
            AND fb.retailer_id = roq.retailer_id
    ),
    
    initial_stats AS (
        SELECT 
            warehouse_id, product_id, packing_unit_id,
            PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_qty) as q1,
            PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_qty) as q3,
            MEDIAN(order_qty) as median_qty,
            STDDEV_POP(order_qty) as stddev_qty,
            AVG(order_qty) as avg_qty
        FROM filtered_orders
        GROUP BY 1, 2, 3
    ),
    
    cleaned_orders AS (
        SELECT fo.*
        FROM filtered_orders fo
        JOIN initial_stats ist ON ist.warehouse_id = fo.warehouse_id
            AND ist.product_id = fo.product_id
            AND ist.packing_unit_id = fo.packing_unit_id
        WHERE fo.order_qty >= ist.q1 - 1.5 * (ist.q3 - ist.q1)
            AND fo.order_qty <= ist.q3 + 1.5 * (ist.q3 - ist.q1)
            AND (ist.stddev_qty = 0 OR ABS(fo.order_qty - ist.avg_qty) <= 3 * ist.stddev_qty)
    ),
    
    recent_trends AS (
        SELECT 
            warehouse_id, product_id, packing_unit_id,
            SUM(order_qty * recency_weight) / NULLIF(SUM(recency_weight), 0) as weighted_avg_qty,
            AVG(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_avg,
            MEDIAN(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_median,
            MAX(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_max,
            COUNT(CASE WHEN order_date >= CURRENT_DATE - 15 THEN 1 END) as last_15d_orders
        FROM cleaned_orders
        GROUP BY 1, 2, 3
    ),
    
    quantity_stats AS (
        SELECT 
            warehouse_id, product_id, packing_unit_id,
            COUNT(DISTINCT parent_sales_order_id) as total_orders,
            MEDIAN(order_qty) as median_qty,
            STDDEV_POP(order_qty) as stddev_qty,
            PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_qty) as q3_qty,
            PERCENTILE_CONT(0.85) WITHIN GROUP (ORDER BY order_qty) as p85_qty,
            PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY order_qty) as p90_qty,
            PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY order_qty) as p95_qty
        FROM cleaned_orders
        GROUP BY 1, 2, 3
    ),
    
    most_frequent_qty AS (
        SELECT warehouse_id, product_id, packing_unit_id, order_qty as mode_qty
        FROM (
            SELECT warehouse_id, product_id, packing_unit_id, order_qty,
                   COUNT(*) as freq,
                   ROW_NUMBER() OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY COUNT(*) DESC, order_qty DESC) as rn
            FROM cleaned_orders
            GROUP BY 1, 2, 3, 4
        )
        WHERE rn = 1
    ),
    
    tier_calculations AS (
        SELECT 
            qs.warehouse_id, qs.product_id, qs.packing_unit_id,
            qs.median_qty, qs.stddev_qty, qs.q3_qty, qs.p85_qty, qs.p90_qty, qs.p95_qty,
            COALESCE(mf.mode_qty, qs.median_qty) as mode_qty,
            rt.weighted_avg_qty, rt.last_15d_median, rt.last_15d_max, rt.last_15d_orders,
            
            CEIL(GREATEST(
                (0.7 * qs.median_qty + 0.3 * COALESCE(rt.weighted_avg_qty, qs.median_qty)) + 1.0 * COALESCE(qs.stddev_qty, 1),
                qs.q3_qty,
                COALESCE(mf.mode_qty, qs.median_qty) + GREATEST(3, qs.median_qty * 0.3),
                CASE 
                    WHEN rt.last_15d_orders >= 2 AND rt.last_15d_median > qs.median_qty 
                    THEN rt.last_15d_median * 1.1
                    ELSE qs.median_qty * 1.1
                END,
                qs.median_qty + 2
            )) as tier_1_qty,
            
            CEIL(GREATEST(
                qs.q3_qty + 1.5 * COALESCE(qs.stddev_qty, 1),
                qs.p85_qty + 1.0 * COALESCE(qs.stddev_qty, 1),
                qs.p90_qty + 0.5 * COALESCE(qs.stddev_qty, 1),
                qs.p95_qty,
                (0.6 * qs.median_qty + 0.4 * COALESCE(rt.weighted_avg_qty, qs.median_qty)) * 2.0,
                CASE 
                    WHEN rt.last_15d_orders >= 2 AND rt.last_15d_max > qs.p90_qty 
                    THEN rt.last_15d_max * 1.1
                    ELSE qs.median_qty * 1.6
                END
            )) as tier_2_qty_base
            
        FROM quantity_stats qs
        LEFT JOIN most_frequent_qty mf ON mf.warehouse_id = qs.warehouse_id 
            AND mf.product_id = qs.product_id AND mf.packing_unit_id = qs.packing_unit_id
        LEFT JOIN recent_trends rt ON rt.warehouse_id = qs.warehouse_id
            AND rt.product_id = qs.product_id AND rt.packing_unit_id = qs.packing_unit_id
    )
    
    SELECT 
        warehouse_id, product_id, packing_unit_id,
        tier_1_qty,
        LEAST(
            CEIL(GREATEST(tier_2_qty_base, tier_1_qty * 1.6)),
            GREATEST(tier_1_qty * 3.5, tier_1_qty + 20)
        ) as tier_2_qty,
        median_qty, stddev_qty
    FROM tier_calculations
    '''
    
    print("  Calculating tier quantities from order history...")
    df = query_snowflake(query)
    
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    
    print(f"    Calculated tiers for {len(df)} product-warehouse combinations")
    return df


def get_warehouse_ticket_stats() -> pd.DataFrame:
    """Get warehouse-level ticket size statistics for wholesale calculations."""
    query = f'''
    WITH base AS (
        SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
        FROM (
            SELECT x.*, TAGGABLE_ID as retailer_id 
            FROM (
                SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
                FROM cohorts 
                WHERE is_active = 'true'
                    AND id IN (700,701,702,703,704,1123,1124,1125,1126)
            ) x 
            JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
            WHERE dt.taggable_id not IN (
                SELECT taggable_id FROM DYNAMIC_TAGgables 
                WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
            )
        )
        QUALIFY rnk = 1 
    ),

    whs AS (
        SELECT * FROM (VALUES
            ('Cairo', 'El-Marg', 38),
            ('Cairo', 'Mostorod', 1),
            ('Giza', 'Barageel', 236),
            ('Giza', 'Sakkarah', 962),
            ('Delta West', 'El-Mahala', 337),
            ('Delta West', 'Tanta', 8),
            ('Delta East', 'Mansoura FC', 339),
            ('Delta East', 'Sharqya', 170),
            ('Upper Egypt', 'Assiut FC', 501),
            ('Upper Egypt', 'Bani sweif', 401),
            ('Upper Egypt', 'Menya Samalot', 703),
            ('Upper Egypt', 'Sohag', 632),
            ('Alexandria', 'Khorshed Alex', 797)
        ) x(region_name, wh, warehouse_id)
    ),

    ticket_sizes AS (
        SELECT 
            whs.warehouse_id,
            whs.wh as warehouse_name,
            so.parent_sales_order_id,
            so.retailer_id,
            SUM(pso.total_price) as ticket_size,
            SUM(pso.purchased_item_count * pup.weight / 1000) as order_weight_kg
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN base ON base.retailer_id = so.retailer_id
        JOIN packing_unit_products pup ON pup.product_id = pso.product_id 
            AND pup.packing_unit_id = pso.packing_unit_id
        JOIN materialized_views.retailer_polygon rp ON rp.retailer_id = so.retailer_id
        JOIN districts ON districts.id = rp.district_id
        JOIN cities ON cities.id = districts.city_id
        JOIN states ON states.id = cities.state_id
        JOIN regions ON regions.id = states.region_id
        JOIN whs ON whs.region_name = CASE WHEN regions.id = 2 THEN states.name_en ELSE regions.name_en END
        WHERE so.created_at::date BETWEEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '4 months') AND CURRENT_DATE - 1
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count > 0
        GROUP BY whs.warehouse_id, whs.wh, so.parent_sales_order_id, so.retailer_id
    ),

    warehouse_stats AS (
        SELECT 
            warehouse_id,
            warehouse_name,
            COUNT(DISTINCT parent_sales_order_id) as total_orders,
            COUNT(DISTINCT retailer_id) as total_retailers,
            AVG(ticket_size) as avg_ticket_size,
            MEDIAN(ticket_size) as median_ticket_size,
            AVG(order_weight_kg) as avg_order_weight_kg
        FROM ticket_sizes
        WHERE ticket_size > 0
        GROUP BY warehouse_id, warehouse_name
    )

    SELECT 
        warehouse_id,
        warehouse_name,
        ROUND(avg_ticket_size, 2) as avg_ticket_size,
        ROUND(median_ticket_size, 2) as median_ticket_size,
        ROUND(avg_order_weight_kg, 2) as avg_order_weight_kg,
        ROUND({WS_CAR_CAPACITY_TONS * 1000} / NULLIF(avg_order_weight_kg, 0), 1) as orders_per_car_by_weight
    FROM warehouse_stats
    ORDER BY warehouse_id
    '''
    
    print("  Fetching warehouse ticket statistics...")
    df = query_snowflake(query)
    
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    
    print(f"    Got stats for {len(df)} warehouses")
    return df

print("✓ Data fetching functions defined")


In [None]:
# =============================================================================
# V2 TIER PRICE CALCULATION (with margin-based fallback)
# =============================================================================

def calculate_tier_prices_v2(row):
    """
    Calculate tier 1 and tier 2 prices for a single row.
    
    V2 CHANGES:
    - Added margin-based fallback when no market/margin tier prices available
    - Fallback: T1 keeps 90%, T2 keeps 75%, T3 keeps 60% of current margin
    
    Args:
        row: DataFrame row with wac_p, current_price, market margins, margin tiers
        
    Returns:
        Series with tier_1_price, tier_2_price, price_source
    """
    current_price = row.get('packing_unit_price')
    wac = row.get('wac_pu')
    tier_1_qty = row.get('tier_1_qty')
    tier_2_qty = row.get('tier_2_qty')
    
    # Validation
    if pd.isna(current_price) or current_price <= 0:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'invalid_current_price'})
    
    if pd.isna(wac) or wac <= 0:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'invalid_wac'})
    
    if current_price <= wac:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'current_price_below_wac'})
    
    # Calculate discount bounds
    max_discount_price = current_price * (1 - MAX_DISCOUNT_PCT / 100)
    min_discount_price = current_price * (1 - MIN_DISCOUNT_PCT / 100)
    
    # Collect candidate prices from market margins and margin tiers
    candidate_prices = []
    
    # Market margin columns
    market_margin_cols = ['below_market', 'market_min', 'market_25', 'market_50', 
                          'market_75', 'market_max', 'above_market']
    
    for col in market_margin_cols:
        margin = row.get(col)
        if pd.notna(margin) and 0 < margin < 1:
            price = wac / (1 - margin)
            if max_discount_price <= price <= min_discount_price and price > wac:
                candidate_prices.append(('market', col, price))
    
    # Margin tier columns
    margin_tier_cols = ['margin_tier_1', 'margin_tier_2', 'margin_tier_3', 'margin_tier_4',
                        'margin_tier_5', 'margin_tier_above_1', 'margin_tier_above_2']
    
    for col in margin_tier_cols:
        margin = row.get(col)
        if pd.notna(margin) and 0 < margin < 1:
            price = wac / (1 - margin)
            if max_discount_price <= price <= min_discount_price and price > wac:
                candidate_prices.append(('margin_tier', col, price))
    
    # Remove duplicates and sort by price descending
    unique_prices = {}
    for source_type, source_col, price in candidate_prices:
        price_rounded = round(price, 2)
        if price_rounded not in unique_prices:
            unique_prices[price_rounded] = (source_type, source_col)
    
    valid_prices = sorted(unique_prices.keys(), reverse=True)
    
    tier_1 = None
    tier_2 = None
    source = ''
    
    # =========================================================================
    # V2 FALLBACK: Use margin-based pricing if no valid prices found
    # =========================================================================
    if len(valid_prices) < 2:
        # Calculate current margin
        current_margin = (current_price - wac) / current_price
        
        if current_margin > MIN_DISCOUNT_PCT / 100:
            # T1: Keep 90% of margin (give up 10%)
            t1_margin = current_margin * FALLBACK_T1_MARGIN_KEEP
            tier_1 = wac / (1 - t1_margin)
            
            # T2: Keep 75% of margin (give up 25%)
            t2_margin = current_margin * FALLBACK_T2_MARGIN_KEEP
            tier_2 = wac / (1 - t2_margin)
            
            # Validate within bounds
            tier_1_valid = max_discount_price <= tier_1 <= min_discount_price and tier_1 > wac
            tier_2_valid = max_discount_price <= tier_2 <= min_discount_price and tier_2 > wac
            
            if tier_1_valid and tier_2_valid and tier_2 < tier_1:
                source = 'margin_fallback'
            else:
                # Try with adjusted bounds
                tier_1 = max(tier_1, max_discount_price) if tier_1 < max_discount_price else min(tier_1, min_discount_price)
                tier_2 = max(tier_2, max_discount_price) if tier_2 < max_discount_price else tier_2
                
                if tier_2 < tier_1 and tier_1 > wac and tier_2 > wac:
                    source = 'margin_fallback_adjusted'
                else:
                    return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 
                                     'price_source': 'fallback_failed'})
        else:
            return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 
                             'price_source': 'insufficient_margin_for_fallback'})
    else:
        # Strategy: Find two prices with minimum gap
        for i, p1 in enumerate(valid_prices):
            for p2 in valid_prices[i+1:]:
                if p2 < p1 * (1 - MIN_GAP_PCT / 100):
                    tier_1 = p1
                    tier_2 = p2
                    source = f"{unique_prices[p1][0]}_{unique_prices[p2][0]}"
                    break
            if tier_1 is not None:
                break
        
        # If no pair with minimum gap, take top two
        if tier_1 is None and len(valid_prices) >= 2:
            tier_1 = valid_prices[0]
            tier_2 = valid_prices[1]
            source = 'top_two_prices'
    
    # Final validation
    if tier_1 is None or tier_2 is None:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'no_valid_pair'})
    
    # Ensure correct ordering
    if tier_2 >= tier_1:
        tier_1, tier_2 = max(tier_1, tier_2), min(tier_1, tier_2)
    
    # Check basic constraints
    if not (wac < tier_2 < tier_1 < current_price):
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'invalid_ordering'})
    
    # =========================================================================
    # ELASTICITY RATIO ADJUSTMENT (T1/T2 ONLY - T3 exempt)
    # Ensure: qty_ratio < discount_ratio
    # =========================================================================
    if pd.notna(tier_1_qty) and pd.notna(tier_2_qty) and tier_1_qty > 0:
        tier_1_discount = current_price - tier_1
        tier_2_discount = current_price - tier_2
        
        if tier_1_discount > 0:
            qty_ratio = tier_2_qty / tier_1_qty
            discount_ratio = tier_2_discount / tier_1_discount
            
            if qty_ratio > 0:
                # V2: Ensure qty_ratio < discount_ratio (elasticity > 1)
                if discount_ratio <= qty_ratio:
                    # Need to increase T2 discount
                    target_discount_ratio = qty_ratio * MIN_RATIO
                    target_tier_2_discount = target_discount_ratio * tier_1_discount
                    adjusted_tier_2 = current_price - target_tier_2_discount
                    
                    if adjusted_tier_2 > wac and adjusted_tier_2 < tier_1:
                        tier_2 = round(adjusted_tier_2, 2)
                        source += '_elasticity_adj'
                    else:
                        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 
                                         'price_source': f'cannot_adjust_elasticity'})
                
                # Also cap at MAX_RATIO
                elasticity_ratio = discount_ratio / qty_ratio
                if elasticity_ratio > MAX_RATIO:
                    target_discount_ratio = MAX_RATIO * qty_ratio
                    target_tier_2_discount = target_discount_ratio * tier_1_discount
                    adjusted_tier_2 = current_price - target_tier_2_discount
                    
                    if adjusted_tier_2 > wac and adjusted_tier_2 < tier_1:
                        tier_2 = round(adjusted_tier_2, 2)
                        source += '_ratio_capped'
    
    # Final rounding
    tier_1 = round(tier_1, 2)
    tier_2 = round(tier_2, 2)
    
    return pd.Series({
        'tier_1_price': tier_1,
        'tier_2_price': tier_2,
        'price_source': source
    })


def calculate_wholesale_tier_v2(row):
    """
    Calculate wholesale (Tier 3) pricing based on delivery cost savings.
    
    V2 CHANGES:
    - Minimum multiplier reduced from 3x to 2x (more accessible)
    - Fallback to margin-based pricing if delivery calc fails
    
    Args:
        row: DataFrame row with pricing and warehouse stats
        
    Returns:
        Series with ws_qty, ws_price, ws_discount_pct, ws_margin, etc.
    """
    current_price = row.get('packing_unit_price')
    wac = row.get('wac_pu')
    avg_ts = row.get('avg_ticket_size', 4000)
    tier_1_price = row.get('tier_1_price')
    tier_2_price = row.get('tier_2_price')
    
    orders_per_car = row.get('orders_per_car_by_weight', 15)
    if pd.isna(orders_per_car) or orders_per_car <= 0:
        orders_per_car = 15
    
    car_cost_per_order = WS_CAR_COST / orders_per_car
    
    if pd.isna(avg_ts) or avg_ts <= 0:
        avg_ts = 4000
    
    # Validation
    if pd.isna(current_price) or pd.isna(wac) or current_price <= 0 or wac <= 0:
        return pd.Series({
            'ws_qty': np.nan, 'ws_price': np.nan, 'ws_discount_pct': np.nan,
            'ws_margin': np.nan, 'ws_multiplier': np.nan, 'ws_savings_pct': np.nan
        })
    
    # Determine comparison price
    if pd.notna(tier_2_price) and tier_2_price > 0:
        compare_price = tier_2_price
    elif pd.notna(tier_1_price) and tier_1_price > 0:
        compare_price = tier_1_price
    else:
        compare_price = None
    
    min_ws_price_legacy = wac / (1 - WS_MIN_MARGIN)
    min_acceptable_price = min_ws_price_legacy
    
    best_scenario = None
    best_savings_pct = 0
    
    # V2: Start from WS_MIN_MULTIPLIER (2x instead of 3x)
    for multiplier in range(WS_MIN_MULTIPLIER, int(orders_per_car) + 1):
        order_value = avg_ts * multiplier
        deliveries_saved = multiplier - 1
        total_savings = deliveries_saved * car_cost_per_order
        qty_at_current_price = order_value / current_price
        
        if qty_at_current_price <= 0:
            continue
        
        discount_per_unit = total_savings / qty_at_current_price
        new_price = current_price - discount_per_unit
        
        price_ok = compare_price is None or new_price < compare_price
        if new_price >= min_acceptable_price and order_value <= WS_MAX_TICKET_SIZE and price_ok:
            margin = (new_price - wac) / new_price
            savings_pct = (discount_per_unit / current_price) * 100
            
            if savings_pct > best_savings_pct:
                best_savings_pct = savings_pct
                best_scenario = {
                    'ws_qty': round(qty_at_current_price, 0),
                    'ws_price': round(new_price, 2),
                    'ws_discount_pct': round((current_price - new_price) / current_price * 100, 2),
                    'ws_margin': round(margin, 4),
                    'ws_multiplier': multiplier,
                    'ws_savings_pct': round(savings_pct, 2)
                }
    
    # V2 FALLBACK: If no valid scenario from delivery calc, use margin-based
    if best_scenario is None and current_price > wac:
        current_margin = (current_price - wac) / current_price
        
        if current_margin > MIN_DISCOUNT_PCT / 100:
            # T3: Keep 60% of margin (give up 40%)
            t3_margin = current_margin * FALLBACK_T3_MARGIN_KEEP
            fallback_price = wac / (1 - t3_margin)
            
            # Set qty as T2 qty * 2 or T1 qty * 3
            tier_2_qty = row.get('tier_2_qty')
            tier_1_qty = row.get('tier_1_qty')
            
            if pd.notna(tier_2_qty) and tier_2_qty > 0:
                ws_qty = tier_2_qty * 2
            elif pd.notna(tier_1_qty) and tier_1_qty > 0:
                ws_qty = tier_1_qty * 3
            else:
                ws_qty = 50  # Default
            
            # Validate price is below T2/T1
            price_ok = compare_price is None or fallback_price < compare_price
            
            if price_ok and fallback_price > wac:
                margin = (fallback_price - wac) / fallback_price
                discount_pct = (current_price - fallback_price) / current_price * 100
                
                if MIN_DISCOUNT_PCT <= discount_pct <= MAX_DISCOUNT_PCT:
                    best_scenario = {
                        'ws_qty': int(ws_qty),
                        'ws_price': round(fallback_price, 2),
                        'ws_discount_pct': round(discount_pct, 2),
                        'ws_margin': round(margin, 4),
                        'ws_multiplier': 0,  # Indicates fallback
                        'ws_savings_pct': round(discount_pct, 2)
                    }
    
    if best_scenario:
        return pd.Series(best_scenario)
    else:
        return pd.Series({
            'ws_qty': np.nan, 'ws_price': np.nan, 'ws_discount_pct': np.nan,
            'ws_margin': np.nan, 'ws_multiplier': np.nan, 'ws_savings_pct': np.nan
        })

print("✓ V2 Tier price calculation functions defined")


In [None]:
# =============================================================================
# V2 PERFORMANCE ADJUSTMENT (Sequential: Qty first, then Discount)
# =============================================================================

def get_tier_thresholds(tier: int) -> tuple:
    """Get LOW and HIGH conversion thresholds for a tier."""
    if tier == 1:
        return T1_LOW_CONVERSION_THRESHOLD, T1_HIGH_CONVERSION_THRESHOLD
    elif tier == 2:
        return T2_LOW_CONVERSION_THRESHOLD, T2_HIGH_CONVERSION_THRESHOLD
    else:  # tier == 3
        return T3_LOW_CONVERSION_THRESHOLD, T3_HIGH_CONVERSION_THRESHOLD


def adjust_tier_by_performance(row, tier: int, 
                               qty_col: str, disc_col: str, contribution_col: str,
                               current_price_col: str = 'packing_unit_price') -> dict:
    """
    Adjust a single tier's qty/discount based on its conversion performance.
    
    V2 SEQUENTIAL LOGIC:
    1. If conversion LOW/ZERO and qty_lower_count < 2: Lower qty by 10%
    2. If conversion LOW/ZERO and qty exhausted and disc_boost_count < 3: Boost discount
    3. If conversion HIGH: Reduce discount by 20%
    
    Args:
        row: DataFrame row with tier data and performance columns
        tier: Tier number (1, 2, or 3)
        qty_col: Column name for tier quantity
        disc_col: Column name for tier discount percentage
        contribution_col: Column name for tier contribution (conversion)
        current_price_col: Column name for current price
        
    Returns:
        dict with adjusted qty, disc, and updated counts
    """
    qty = row.get(qty_col)
    disc_pct = row.get(disc_col)
    contribution = row.get(contribution_col, 0) or 0
    current_price = row.get(current_price_col)
    wac = row.get('wac_pu')
    has_active_qd = row.get('has_active_qd', False)
    
    # Get tracking counts (default 0 for new QDs)
    qty_lower_count = row.get(f't{tier}_qty_lower_count', 0) or 0
    disc_boost_count = row.get(f't{tier}_disc_boost_count', 0) or 0
    
    # Get thresholds
    low_threshold, high_threshold = get_tier_thresholds(tier)
    
    result = {
        'qty': qty,
        'disc_pct': disc_pct,
        'qty_lower_count': qty_lower_count,
        'disc_boost_count': disc_boost_count,
        'action': 'none'
    }
    
    # Skip if tier not valid
    if pd.isna(qty) or pd.isna(disc_pct) or qty <= 0 or disc_pct <= 0:
        result['action'] = 'invalid_tier'
        return result
    
    # Skip adjustment if no existing QD (use calculated values)
    if not has_active_qd:
        result['action'] = 'new_qd'
        return result
    
    # ==========================================================================
    # PERFORMANCE-BASED ADJUSTMENT
    # ==========================================================================
    
    # CASE 1: HIGH CONVERSION - Reduce discount to save margin
    if contribution >= high_threshold:
        new_disc = disc_pct * HIGH_CONVERSION_MULTIPLIER
        
        # Ensure discount stays above minimum
        if new_disc >= MIN_DISCOUNT_PCT:
            result['disc_pct'] = round(new_disc, 2)
            result['action'] = 'high_conversion_reduce'
        else:
            result['action'] = 'high_conversion_skip_below_min'
        
        return result
    
    # CASE 2: LOW/ZERO CONVERSION - Sequential adjustment
    if contribution < low_threshold:
        
        # STEP 1: Try lowering quantity first (max 2 times)
        if qty_lower_count < MAX_QTY_LOWER_COUNT:
            reduction = max(QTY_REDUCTION_MIN, int(qty * QTY_REDUCTION_PCT))
            new_qty = qty - reduction
            
            if new_qty >= 2:  # Minimum qty of 2
                result['qty'] = int(new_qty)
                result['qty_lower_count'] = qty_lower_count + 1
                result['action'] = f'low_conv_lower_qty_{qty_lower_count + 1}'
                return result
        
        # STEP 2: If qty exhausted, try boosting discount (max 3 times)
        if disc_boost_count < MAX_DISC_BOOST_COUNT:
            # Use different multiplier for zero vs low
            if contribution == 0:
                multiplier = ZERO_CONVERSION_MULTIPLIER
            else:
                multiplier = LOW_CONVERSION_MULTIPLIER
            
            new_disc = disc_pct * multiplier
            
            # Cap at maximum discount
            if new_disc <= MAX_DISCOUNT_PCT:
                result['disc_pct'] = round(new_disc, 2)
                result['disc_boost_count'] = disc_boost_count + 1
                result['action'] = f'low_conv_boost_disc_{disc_boost_count + 1}'
            else:
                # Cap at max and still increment
                result['disc_pct'] = MAX_DISCOUNT_PCT
                result['disc_boost_count'] = disc_boost_count + 1
                result['action'] = f'low_conv_boost_disc_{disc_boost_count + 1}_capped'
            
            return result
        
        # STEP 3: Both exhausted - hold
        result['action'] = 'low_conv_exhausted'
        return result
    
    # CASE 3: NORMAL CONVERSION - Keep as-is
    result['action'] = 'normal_keep'
    return result


def apply_performance_adjustments(df: pd.DataFrame) -> pd.DataFrame:
    """
    Apply performance adjustments to all tiers for all SKUs.
    
    Args:
        df: DataFrame with tier data and contribution columns
        
    Returns:
        DataFrame with adjusted tiers
    """
    df = df.copy()
    
    # Initialize tracking columns if not present
    for tier in [1, 2, 3]:
        if f't{tier}_qty_lower_count' not in df.columns:
            df[f't{tier}_qty_lower_count'] = 0
        if f't{tier}_disc_boost_count' not in df.columns:
            df[f't{tier}_disc_boost_count'] = 0
    
    adjustment_summary = {1: {}, 2: {}, 3: {}}
    
    # Tier 1 adjustments
    print("\n  Adjusting Tier 1...")
    t1_adjustments = df.apply(
        lambda row: adjust_tier_by_performance(
            row, tier=1, 
            qty_col='tier_1_qty', 
            disc_col='tier_1_disc_pct',
            contribution_col='t1_cntrb_uth'
        ), axis=1, result_type='expand'
    )
    
    df['tier_1_qty'] = t1_adjustments['qty']
    df['tier_1_disc_pct'] = t1_adjustments['disc_pct']
    df['t1_qty_lower_count'] = t1_adjustments['qty_lower_count']
    df['t1_disc_boost_count'] = t1_adjustments['disc_boost_count']
    df['t1_action'] = t1_adjustments['action']
    
    # Tier 2 adjustments
    print("  Adjusting Tier 2...")
    t2_adjustments = df.apply(
        lambda row: adjust_tier_by_performance(
            row, tier=2, 
            qty_col='tier_2_qty', 
            disc_col='tier_2_disc_pct',
            contribution_col='t2_cntrb_uth'
        ), axis=1, result_type='expand'
    )
    
    df['tier_2_qty'] = t2_adjustments['qty']
    df['tier_2_disc_pct'] = t2_adjustments['disc_pct']
    df['t2_qty_lower_count'] = t2_adjustments['qty_lower_count']
    df['t2_disc_boost_count'] = t2_adjustments['disc_boost_count']
    df['t2_action'] = t2_adjustments['action']
    
    # Tier 3 adjustments
    print("  Adjusting Tier 3...")
    t3_adjustments = df.apply(
        lambda row: adjust_tier_by_performance(
            row, tier=3, 
            qty_col='ws_qty', 
            disc_col='ws_discount_pct',
            contribution_col='t3_cntrb_uth'
        ), axis=1, result_type='expand'
    )
    
    df['ws_qty'] = t3_adjustments['qty']
    df['ws_discount_pct'] = t3_adjustments['disc_pct']
    df['t3_qty_lower_count'] = t3_adjustments['qty_lower_count']
    df['t3_disc_boost_count'] = t3_adjustments['disc_boost_count']
    df['t3_action'] = t3_adjustments['action']
    
    # Summary
    for tier, col in [(1, 't1_action'), (2, 't2_action'), (3, 't3_action')]:
        actions = df[col].value_counts()
        print(f"\n  Tier {tier} adjustment summary:")
        for action, count in actions.items():
            print(f"    {action}: {count}")
    
    return df


def recalculate_prices_from_discounts(df: pd.DataFrame) -> pd.DataFrame:
    """
    Recalculate tier prices from adjusted discount percentages.
    
    Args:
        df: DataFrame with packing_unit_price and tier disc_pct columns
        
    Returns:
        DataFrame with recalculated prices
    """
    df = df.copy()
    
    # Recalculate T1 price
    mask_t1 = df['tier_1_disc_pct'].notna() & (df['tier_1_disc_pct'] > 0)
    df.loc[mask_t1, 'tier_1_price'] = (
        df.loc[mask_t1, 'packing_unit_price'] * 
        (1 - df.loc[mask_t1, 'tier_1_disc_pct'] / 100)
    ).round(2)
    
    # Recalculate T2 price
    mask_t2 = df['tier_2_disc_pct'].notna() & (df['tier_2_disc_pct'] > 0)
    df.loc[mask_t2, 'tier_2_price'] = (
        df.loc[mask_t2, 'packing_unit_price'] * 
        (1 - df.loc[mask_t2, 'tier_2_disc_pct'] / 100)
    ).round(2)
    
    # Recalculate WS price
    mask_ws = df['ws_discount_pct'].notna() & (df['ws_discount_pct'] > 0)
    df.loc[mask_ws, 'ws_price'] = (
        df.loc[mask_ws, 'packing_unit_price'] * 
        (1 - df.loc[mask_ws, 'ws_discount_pct'] / 100)
    ).round(2)
    
    return df

print("✓ V2 Performance adjustment functions defined")


In [None]:
# =============================================================================
# HELPER FUNCTIONS
# =============================================================================

def parse_keep_qd_tiers(value):
    """Parse keep_qd_tiers from string or list."""
    if isinstance(value, list):
        return value
    if value is None:
        return []
    try:
        if pd.isna(value):
            return []
    except (ValueError, TypeError):
        pass
    if isinstance(value, str):
        try:
            return ast.literal_eval(value)
        except:
            return []
    return []


def validate_tier_ordering(df: pd.DataFrame) -> pd.DataFrame:
    """
    Validate and enforce: discount_T1 < discount_T2 < discount_T3
    
    If violated, invalidate the higher quantity tier.
    """
    df = df.copy()
    
    print("\n  Validating discount ordering (T1 < T2 < T3)...")
    
    # If T1 >= T2 discount, invalidate T2
    t1_gte_t2 = (df['tier_1_disc_pct'].notna() & 
                 df['tier_2_disc_pct'].notna() & 
                 (df['tier_1_disc_pct'] >= df['tier_2_disc_pct']))
    if t1_gte_t2.sum() > 0:
        df.loc[t1_gte_t2, ['tier_2_qty', 'tier_2_price', 'tier_2_disc_pct']] = np.nan
        print(f"    Invalidated T2 for {t1_gte_t2.sum()} SKUs (T1 >= T2 discount)")
    
    # If T2 >= T3 discount, invalidate T3
    t2_gte_t3 = (df['tier_2_disc_pct'].notna() & 
                 df['ws_discount_pct'].notna() & 
                 (df['tier_2_disc_pct'] >= df['ws_discount_pct']))
    if t2_gte_t3.sum() > 0:
        df.loc[t2_gte_t3, ['ws_qty', 'ws_price', 'ws_discount_pct']] = np.nan
        print(f"    Invalidated T3 for {t2_gte_t3.sum()} SKUs (T2 >= T3 discount)")
    
    # If T1 >= T3 (when T2 missing)
    t1_gte_t3 = (df['tier_1_disc_pct'].notna() & 
                 df['ws_discount_pct'].notna() & 
                 (df['tier_1_disc_pct'] >= df['ws_discount_pct']))
    if t1_gte_t3.sum() > 0:
        df.loc[t1_gte_t3, ['ws_qty', 'ws_price', 'ws_discount_pct']] = np.nan
        print(f"    Invalidated T3 for {t1_gte_t3.sum()} SKUs (T1 >= T3 discount)")
    
    return df


def validate_elasticity_t1_t2(df: pd.DataFrame) -> pd.DataFrame:
    """
    Validate elasticity constraint for T1/T2 only (T3 exempt).
    
    Constraint: qty_ratio < discount_ratio (elasticity > 1)
    """
    df = df.copy()
    
    print("\n  Validating T1/T2 elasticity...")
    
    mask = (df['tier_1_qty'].notna() & df['tier_2_qty'].notna() &
            df['tier_1_disc_pct'].notna() & df['tier_2_disc_pct'].notna() &
            (df['tier_1_qty'] > 0) & (df['tier_1_disc_pct'] > 0))
    
    violations = 0
    for idx in df[mask].index:
        qty_ratio = df.loc[idx, 'tier_2_qty'] / df.loc[idx, 'tier_1_qty']
        disc_ratio = df.loc[idx, 'tier_2_disc_pct'] / df.loc[idx, 'tier_1_disc_pct']
        
        # Constraint: qty_ratio < disc_ratio
        if disc_ratio <= qty_ratio:
            # Try to adjust T2 discount
            target_disc_ratio = qty_ratio * MIN_RATIO
            new_t2_disc = df.loc[idx, 'tier_1_disc_pct'] * target_disc_ratio
            
            if new_t2_disc <= MAX_DISCOUNT_PCT:
                df.loc[idx, 'tier_2_disc_pct'] = round(new_t2_disc, 2)
                df.loc[idx, 'tier_2_price'] = round(
                    df.loc[idx, 'packing_unit_price'] * (1 - new_t2_disc / 100), 2
                )
            else:
                # Invalidate T2
                df.loc[idx, ['tier_2_qty', 'tier_2_price', 'tier_2_disc_pct']] = np.nan
                violations += 1
    
    if violations > 0:
        print(f"    Invalidated T2 for {violations} SKUs (elasticity violation)")
    
    return df


def apply_tier_flags(df: pd.DataFrame) -> pd.DataFrame:
    """Apply keep_qd_tiers filter and calculate tier flags."""
    df = df.copy()
    
    def get_tier_flags(row):
        keep_tiers = parse_keep_qd_tiers(row.get('keep_qd_tiers'))
        if not keep_tiers:
            keep_tiers = ['T1', 'T2', 'T3']
        
        t1_valid = ('T1' in keep_tiers and 
                    pd.notna(row.get('tier_1_qty')) and 
                    pd.notna(row.get('tier_1_disc_pct')) and 
                    row.get('tier_1_disc_pct', 0) > 0)
        
        t2_valid = ('T2' in keep_tiers and 
                    pd.notna(row.get('tier_2_qty')) and 
                    pd.notna(row.get('tier_2_disc_pct')) and 
                    row.get('tier_2_disc_pct', 0) > 0)
        
        t3_valid = ('T3' in keep_tiers and 
                    pd.notna(row.get('ws_qty')) and 
                    pd.notna(row.get('ws_discount_pct')) and 
                    row.get('ws_discount_pct', 0) > 0)
        
        return pd.Series({
            't1_f': int(t1_valid),
            't2_f': int(t2_valid),
            't3_f': int(t3_valid)
        })
    
    tier_flags = df.apply(get_tier_flags, axis=1)
    df = pd.concat([df, tier_flags], axis=1)
    
    # Set invalid tier values to null
    df.loc[df['t1_f'] == 0, ['tier_1_qty', 'tier_1_price', 'tier_1_disc_pct']] = np.nan
    df.loc[df['t2_f'] == 0, ['tier_2_qty', 'tier_2_price', 'tier_2_disc_pct']] = np.nan
    df.loc[df['t3_f'] == 0, ['ws_qty', 'ws_price', 'ws_discount_pct']] = np.nan
    
    df['all_f'] = df['t1_f'] + df['t2_f'] + df['t3_f']
    
    return df


def priority_selection_v2(df: pd.DataFrame, limit: int = TOP_TIERS_PER_WAREHOUSE) -> pd.DataFrame:
    """
    V2 Priority-based selection:
    1. High DOH SKUs FIRST (DOH > 30 AND inventory_value > 10K)
    2. Then by mtd_qty * effective_price
    3. Total limit: 400 tier entries per warehouse
    """
    df = df.copy()
    
    print(f"\n  Applying V2 priority selection (High DOH first, {limit} total)...")
    
    # Calculate inventory value if not present
    if 'inventory_value' not in df.columns:
        if 'stocks' in df.columns and 'effective_price' in df.columns:
            df['inventory_value'] = df['stocks'] * df['effective_price']
        else:
            df['inventory_value'] = 0
    
    # Mark High DOH priority
    df['is_high_doh'] = (
        (df.get('responsive_doh', df.get('doh', 0)) > HIGH_DOH_THRESHOLD) &
        (df['inventory_value'] >= HIGH_DOH_INVENTORY_THRESHOLD)
    ).astype(int)
    
    high_doh_count = df['is_high_doh'].sum()
    print(f"    High DOH SKUs (DOH>{HIGH_DOH_THRESHOLD}, inv>={HIGH_DOH_INVENTORY_THRESHOLD}): {high_doh_count}")
    
    # Calculate ranking score
    df['mtd_qty'] = df['mtd_qty'].fillna(0)
    df['ranking_score'] = df['mtd_qty'] * df['effective_price']
    
    # Sort: High DOH first, then by ranking score
    df = df.sort_values(
        ['warehouse_id', 'is_high_doh', 'ranking_score'], 
        ascending=[True, False, False]
    )
    
    # Calculate cumulative tier count and filter
    df['cumsum'] = df.groupby('warehouse_id')['all_f'].cumsum()
    df_selected = df[df['cumsum'] <= limit].copy()
    
    # Summary
    high_doh_selected = df_selected['is_high_doh'].sum()
    print(f"    Selected: {len(df_selected)} SKUs ({df_selected['all_f'].sum()} tiers)")
    print(f"    High DOH selected: {high_doh_selected}")
    print(f"    Regular selected: {len(df_selected) - high_doh_selected}")
    
    return df_selected

print("✓ Helper functions defined")


In [None]:
# =============================================================================
# MAIN FUNCTION: process_qd_v2
# =============================================================================

def process_qd_v2(df_qd: pd.DataFrame, dry_run: bool = True) -> dict:
    """
    V2 Main function to process Quantity Discounts.
    
    V2 IMPROVEMENTS:
    - Priority-based selection (High DOH first)
    - Sequential performance adjustment (qty first, then discount)
    - Tier-specific conversion thresholds
    - Margin-based fallbacks
    - Elasticity validation (T1/T2 only)
    
    Args:
        df_qd: DataFrame with columns from Module 3
        dry_run: If True, only log what would be done (default: True)
        
    Returns:
        dict with processing results
    """
    print("\n" + "="*70)
    print("QD HANDLER V2: PROCESSING QUANTITY DISCOUNTS")
    print("="*70)
    print(f"Mode: {'DRY RUN (testing)' if dry_run else 'LIVE'}")
    print(f"Timestamp: {CAIRO_NOW.strftime('%Y-%m-%d %H:%M')} Cairo Time")
    print(f"Input SKUs: {len(df_qd)}")
    
    if len(df_qd) == 0:
        print("\nNo SKUs to process. Exiting.")
        return {
            'mode': 'testing' if dry_run else 'live',
            'total_input': 0,
            'processed': 0,
            'failed': 0,
            'deactivate_result': {'total_active': 0, 'deactivated': [], 'failed': []},
            'create_result': {'created_count': 0, 'failed_count': 0, 'errors': []}
        }
    
    print(f"\nUnique warehouses: {df_qd['warehouse_id'].nunique()}")
    
    # =========================================================================
    # STEP 1: DEACTIVATE ALL EXISTING QUANTITY DISCOUNTS
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 1: Deactivating existing Quantity Discounts...")
    print("-"*60)
    
    deactivate_result = deactivate_active_qd(dry_run=dry_run)
    
    # =========================================================================
    # STEP 2: GET PACKING UNITS
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 2: Getting top-selling packing units...")
    print("-"*60)
    
    product_warehouse_list = df_qd[['product_id', 'warehouse_id']].drop_duplicates().values.tolist()
    df_packing_units = get_top_selling_packing_units(product_warehouse_list)
    
    if len(df_packing_units) == 0:
        print("  No packing units found!")
        return {
            'mode': 'testing' if dry_run else 'live',
            'total_input': len(df_qd),
            'processed': 0,
            'failed': len(df_qd),
            'deactivate_result': deactivate_result,
            'create_result': {'created_count': 0, 'failed_count': 0, 'errors': [{'error': 'No packing units found'}]}
        }
    
    df_work = df_qd.merge(df_packing_units, on=['product_id', 'warehouse_id'], how='inner')
    print(f"  Matched {len(df_work)} SKUs with packing units")
    
    # Use new_price if available
    df_work['effective_price'] = df_work['new_price'].fillna(df_work['current_price'])
    df_work['wac_pu'] = df_work['wac_p'] * df_work['basic_unit_count']
    df_work['packing_unit_price'] = df_work['effective_price'] * df_work['basic_unit_count']
    
    # =========================================================================
    # STEP 3: GET WAREHOUSE TICKET STATISTICS
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 3: Getting warehouse ticket statistics...")
    print("-"*60)
    
    df_warehouse_stats = get_warehouse_ticket_stats()
    
    if len(df_warehouse_stats) > 0:
        df_work = df_work.merge(
            df_warehouse_stats[['warehouse_id', 'avg_ticket_size', 'orders_per_car_by_weight']],
            on='warehouse_id', how='left'
        )
    else:
        df_work['avg_ticket_size'] = 4000
        df_work['orders_per_car_by_weight'] = 15
    
    # =========================================================================
    # STEP 4: CALCULATE TIER QUANTITIES
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 4: Calculating tier quantities...")
    print("-"*60)
    
    product_warehouse_pu_list = df_work[['warehouse_id', 'product_id', 'packing_unit_id']].drop_duplicates().values.tolist()
    df_tier_qty = get_tier_quantities(product_warehouse_pu_list)
    
    if len(df_tier_qty) > 0:
        df_work = df_work.merge(
            df_tier_qty[['warehouse_id', 'product_id', 'packing_unit_id', 'tier_1_qty', 'tier_2_qty']],
            on=['warehouse_id', 'product_id', 'packing_unit_id'], how='left'
        )
        print(f"  {df_work['tier_1_qty'].notna().sum()} SKUs have tier quantities")
    
    # =========================================================================
    # STEP 5: CALCULATE T1 & T2 PRICES (V2 with fallback)
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 5: Calculating T1 & T2 prices (V2 with fallback)...")
    print("-"*60)
    
    price_results = df_work.apply(calculate_tier_prices_v2, axis=1)
    df_work = pd.concat([df_work, price_results], axis=1)
    
    valid_t1_t2 = df_work['tier_1_price'].notna() & df_work['tier_2_price'].notna()
    print(f"  Valid T1 & T2 prices: {valid_t1_t2.sum()} / {len(df_work)}")
    
    if 'price_source' in df_work.columns:
        print("\n  Price source distribution:")
        for source, count in df_work['price_source'].value_counts().head(5).items():
            print(f"    {source}: {count}")
    
    # =========================================================================
    # STEP 6: CALCULATE T3 (WHOLESALE) PRICES (V2 with 2x min)
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 6: Calculating T3 (wholesale) prices (V2: 2x min)...")
    print("-"*60)
    
    ws_results = df_work.apply(calculate_wholesale_tier_v2, axis=1)
    df_work = pd.concat([df_work, ws_results], axis=1)
    
    valid_t3 = df_work['ws_price'].notna()
    print(f"  Valid T3 prices: {valid_t3.sum()} / {len(df_work)}")
    
    # =========================================================================
    # STEP 7: CALCULATE DISCOUNT PERCENTAGES
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 7: Calculating discount percentages...")
    print("-"*60)
    
    df_work['tier_1_disc_pct'] = ((df_work['packing_unit_price'] - df_work['tier_1_price']) / 
                                  df_work['packing_unit_price'] * 100).round(2)
    df_work['tier_2_disc_pct'] = ((df_work['packing_unit_price'] - df_work['tier_2_price']) / 
                                  df_work['packing_unit_price'] * 100).round(2)
    
    # =========================================================================
    # STEP 8: V2 PERFORMANCE ADJUSTMENTS
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 8: Applying V2 performance adjustments...")
    print("-"*60)
    
    # Initialize performance columns if not present
    for col in ['t1_cntrb_uth', 't2_cntrb_uth', 't3_cntrb_uth', 'has_active_qd']:
        if col not in df_work.columns:
            df_work[col] = 0
    
    df_work = apply_performance_adjustments(df_work)
    df_work = recalculate_prices_from_discounts(df_work)
    
    # =========================================================================
    # STEP 9: VALIDATE TIER ORDERING AND ELASTICITY
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 9: Validating tier ordering and elasticity...")
    print("-"*60)
    
    df_work = validate_tier_ordering(df_work)
    df_work = validate_elasticity_t1_t2(df_work)
    
    # =========================================================================
    # STEP 10: APPLY TIER FLAGS AND FILTER
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 10: Applying tier flags and filtering...")
    print("-"*60)
    
    df_work = apply_tier_flags(df_work)
    
    # Only keep SKUs with at least 2 valid tiers
    df_work = df_work[df_work['all_f'] >= 2].copy()
    
    print(f"  SKUs with valid tiers after filtering: {len(df_work)}")
    print(f"  Total tier entries: {df_work['all_f'].sum()}")
    print(f"    T1 valid: {df_work['t1_f'].sum()}")
    print(f"    T2 valid: {df_work['t2_f'].sum()}")
    print(f"    T3 valid: {df_work['t3_f'].sum()}")
    
    # =========================================================================
    # STEP 11: V2 PRIORITY SELECTION (High DOH first)
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 11: V2 Priority selection (High DOH first)...")
    print("-"*60)
    
    df_top = priority_selection_v2(df_work)
    
    print(f"\n  Tier entries per warehouse:")
    for wh in df_top['warehouse_id'].unique():
        wh_data = df_top[df_top['warehouse_id'] == wh]
        high_doh = wh_data['is_high_doh'].sum()
        print(f"    WH {wh}: {len(wh_data)} SKUs, {wh_data['all_f'].sum()} tiers (High DOH: {high_doh})")
    
    # =========================================================================
    # STEP 12: BUILD QD CONFIGURATIONS
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 12: Building QD configurations...")
    print("-"*60)
    
    qd_configs = []
    
    for _, row in df_top.iterrows():
        tiers = []
        
        if row['t1_f'] == 1:
            tiers.append({
                "tier": 1,
                "quantity": int(row['tier_1_qty']),
                "discount_pct": float(row['tier_1_disc_pct'])
            })
        
        if row['t2_f'] == 1:
            tiers.append({
                "tier": 2,
                "quantity": int(row['tier_2_qty']),
                "discount_pct": float(row['tier_2_disc_pct'])
            })
        
        if row['t3_f'] == 1:
            tiers.append({
                "tier": 3,
                "quantity": int(row['ws_qty']),
                "discount_pct": float(row['ws_discount_pct'])
            })
        
        qd_configs.append({
            'product_id': int(row['product_id']),
            'warehouse_id': int(row['warehouse_id']),
            'cohort_id': int(row.get('cohort_id', 0)),
            'packing_unit_id': int(row['packing_unit_id']),
            'tiers': tiers,
            'sku': row.get('sku', 'N/A'),
            'is_high_doh': int(row.get('is_high_doh', 0)),
            'ranking_score': row.get('ranking_score', 0)
        })
    
    print(f"  Valid QD configs: {len(qd_configs)}")
    
    tier_counts = {1: 0, 2: 0, 3: 0}
    for config in qd_configs:
        for t in config['tiers']:
            tier_counts[t['tier']] += 1
    print(f"\n  Tier distribution:")
    print(f"    T1: {tier_counts[1]}, T2: {tier_counts[2]}, T3: {tier_counts[3]}")
    
    # =========================================================================
    # STEP 13: SAVE REVIEW FILE
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 13: Saving review file...")
    print("-"*60)
    
    review_columns = [
        'warehouse_id', 'product_id', 'packing_unit_id', 'sku', 'brand', 'cat',
        'effective_price', 'packing_unit_price', 'wac_p', 'wac_pu',
        'tier_1_qty', 'tier_1_price', 'tier_1_disc_pct', 't1_f', 't1_action',
        'tier_2_qty', 'tier_2_price', 'tier_2_disc_pct', 't2_f', 't2_action',
        'ws_qty', 'ws_price', 'ws_discount_pct', 't3_f', 't3_action',
        'all_f', 'is_high_doh', 'ranking_score', 'price_source'
    ]
    review_columns = [c for c in review_columns if c in df_top.columns]
    df_review = df_top[review_columns].copy()
    
    SLACK_CHANNEL_ID = 'C0AAWK97Z3Q'
    review_file_name = f'QD_V2_review_{CAIRO_NOW.strftime("%Y%m%d_%H%M")}.xlsx'
    send_file_slack(
        df_review, 
        f'QD V2 Review: {len(df_review)} SKUs ready for processing', 
        SLACK_CHANNEL_ID,
        filename=review_file_name
    )
    print(f"  Sent review file to Slack ({len(df_review)} rows)")
    
    # =========================================================================
    # STEP 14: CREATE NEW QUANTITY DISCOUNTS
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 14: Creating new Quantity Discounts...")
    print("-"*60)
    
    if len(qd_configs) == 0:
        print("  No Quantity Discounts to create.")
        create_result = {"success": True, "created_count": 0, "failed_count": 0, "errors": []}
    else:
        print(f"  Creating {len(qd_configs)} Quantity Discounts...")
        create_result = bulk_create_qd(qd_configs, df_top, dry_run=dry_run)
        print(f"\n  Creation Result:")
        print(f"    Created: {create_result['created_count']}")
        print(f"    Failed: {create_result['failed_count']}")
    
    # =========================================================================
    # STEP 15: UPDATE CART RULES
    # =========================================================================
    print("\n" + "-"*60)
    print("STEP 15: Updating cart rules...")
    print("-"*60)
    
    cart_rules_update = prepare_cart_rules_update(df_top, df_qd)
    
    if len(cart_rules_update) == 0:
        print("  No cart rules need updating.")
        cart_rules_result = {'success': [], 'failed': []}
    else:
        print(f"  Uploading cart rules...")
        cart_rules_result = upload_cart_rules(cart_rules_update, dry_run=dry_run)
        print(f"\n  Cart Rules Result:")
        print(f"    Cohorts updated: {len(cart_rules_result['success'])}")
        print(f"    Cohorts failed: {len(cart_rules_result['failed'])}")
    
    # =========================================================================
    # SUMMARY
    # =========================================================================
    total_tiers = sum(tier_counts.values())
    
    print("\n" + "="*70)
    print("QD HANDLER V2 - SUMMARY")
    print("="*70)
    print(f"Mode: {'DRY RUN (testing)' if dry_run else 'LIVE'}")
    print(f"Total SKUs in input: {len(df_qd)}")
    print(f"SKUs with valid T1 & T2 prices: {valid_t1_t2.sum()}")
    print(f"SKUs with valid T3 prices: {valid_t3.sum()}")
    print(f"SKUs after filtering & selection: {len(df_top)}")
    print(f"  High DOH: {df_top['is_high_doh'].sum()}")
    print(f"  Regular: {len(df_top) - df_top['is_high_doh'].sum()}")
    print(f"Total tier entries: {total_tiers}")
    print(f"QD found active: {deactivate_result['total_active']}")
    print(f"QD deactivated: {len(deactivate_result['deactivated'])}")
    print(f"QD created: {create_result['created_count']}")
    print(f"Cart rules updated: {len(cart_rules_update)} products")
    print("="*70)
    
    return {
        'mode': 'testing' if dry_run else 'live',
        'total_input': len(df_qd),
        'processed': create_result['created_count'],
        'failed': create_result['failed_count'],
        'total_tiers': total_tiers,
        'high_doh_count': int(df_top['is_high_doh'].sum()),
        'deactivate_result': deactivate_result,
        'create_result': create_result,
        'cart_rules_result': cart_rules_result,
        'cart_rules_update': cart_rules_update,
        'qd_configs': qd_configs,
        'df_work': df_top,
        'review_file': review_file_name
    }

print("✓ process_qd_v2() function defined")


In [None]:
# =============================================================================
# API FUNCTIONS
# =============================================================================

def deactivate_active_qd(dry_run: bool = True) -> dict:
    """Deactivate ALL active Quantity Discounts."""
    print("\n" + "="*60)
    print("DEACTIVATING ACTIVE QUANTITY DISCOUNTS")
    print("="*60)
    print(f"Mode: {'DRY RUN' if dry_run else 'LIVE'}")
    
    print("\nStep 1: Querying active Quantity Discounts from Snowflake...")
    df_active = get_active_qd_now()
    
    if len(df_active) == 0:
        print("  No active Quantity Discounts found.")
        return {'success': True, 'deactivated': [], 'failed': [], 'total_active': 0}
    
    discount_ids = df_active['discount_id'].tolist()
    print(f"  Found {len(discount_ids)} active Quantity Discounts")
    
    print(f"\nStep 2: Deactivating {len(discount_ids)} discounts...")
    
    results = {'deactivated': [], 'failed': []}
    
    if not dry_run:
        auth_token = _get_api_token()
        headers = {
            'Authorization': f'Bearer {auth_token}',
            'Content-Type': 'application/json'
        }
    
    for idx, discount_id in enumerate(discount_ids):
        if dry_run:
            print(f"  [{idx+1}/{len(discount_ids)}] [DRY RUN] Would deactivate: {discount_id}")
            results['deactivated'].append(discount_id)
            continue
        
        url = f"{QD_API_URL}{discount_id}/activation?status=false"
        
        try:
            response = requests.put(url, headers=headers, json={'status': False})
            
            if response.status_code in [200, 204]:
                print(f"  [{idx+1}/{len(discount_ids)}] [OK] Deactivated: {discount_id}")
                results['deactivated'].append(discount_id)
            else:
                print(f"  [{idx+1}/{len(discount_ids)}] [ERROR] {discount_id}: {response.status_code}")
                results['failed'].append({'id': discount_id, 'error': f"{response.status_code}"})
        except Exception as e:
            print(f"  [{idx+1}/{len(discount_ids)}] [EXCEPTION] {discount_id}: {e}")
            results['failed'].append({'id': discount_id, 'error': str(e)})
        
        time.sleep(0.5)
    
    print(f"\n{'='*60}")
    print("DEACTIVATION SUMMARY")
    print(f"{'='*60}")
    print(f"Total active found: {len(discount_ids)}")
    print(f"Successfully deactivated: {len(results['deactivated'])}")
    print(f"Failed: {len(results['failed'])}")
    
    return {
        'success': len(results['failed']) == 0,
        'deactivated': results['deactivated'],
        'failed': results['failed'],
        'total_active': len(discount_ids)
    }


def create_upload_format(df_configs: pd.DataFrame) -> pd.DataFrame:
    """Create upload format DataFrame from QD configurations."""
    final_quantity_discount = pd.DataFrame(columns=['warehouse_id', 'Discounts Group 1', 'Discounts Group 2', 'Description'])
    
    for wh_id in df_configs['warehouse_id'].unique():
        warehouse_data = df_configs[df_configs['warehouse_id'] == wh_id]
        warehouse_id = int(wh_id)
        
        tier_1_items = []
        tier_2_items = []
        ws_items = []
        
        for _, r in warehouse_data.iterrows():
            product_id = int(r['product_id'])
            packing_unit_id = int(r['packing_unit_id'])
            
            if r.get('t1_f', 0) == 1 and pd.notna(r.get('tier_1_qty')) and pd.notna(r.get('tier_1_disc_pct')):
                q_1 = int(r['tier_1_qty'])
                d_1 = min(round(r['tier_1_disc_pct'], 2), MAX_DISCOUNT_CAP_T1)
                tier_1_items.append([product_id, packing_unit_id, q_1, d_1])
            
            if r.get('t2_f', 0) == 1 and pd.notna(r.get('tier_2_qty')) and pd.notna(r.get('tier_2_disc_pct')):
                q_2 = int(r['tier_2_qty'])
                d_2 = min(round(r['tier_2_disc_pct'], 2), MAX_DISCOUNT_CAP_T2)
                tier_2_items.append([product_id, packing_unit_id, q_2, d_2])
            
            if r.get('t3_f', 0) == 1 and pd.notna(r.get('ws_qty')) and pd.notna(r.get('ws_discount_pct')):
                q_ws = int(r['ws_qty'])
                d_ws = min(round(r['ws_discount_pct'], 2), MAX_DISCOUNT_CAP_WS)
                ws_items.append([product_id, packing_unit_id, q_ws, d_ws])
        
        group_1_items = tier_1_items + ws_items
        
        if len(group_1_items) > MAX_GROUP_SIZE:
            overflow = group_1_items[MAX_GROUP_SIZE:]
            group_1_items = group_1_items[:MAX_GROUP_SIZE]
            group_2_items = tier_2_items + overflow
        else:
            group_2_items = tier_2_items
        
        new_row = {
            'warehouse_id': warehouse_id,
            'Discounts Group 1': group_1_items,
            'Discounts Group 2': group_2_items,
            'Description': f'{warehouse_id}QD'
        }
        final_quantity_discount = pd.concat([final_quantity_discount, pd.DataFrame([new_row])], ignore_index=True)
    
    return final_quantity_discount


def prepare_upload_file(df_upload: pd.DataFrame, dry_run: bool = True) -> tuple:
    """Prepare the final upload file with tag IDs and date/time."""
    df_mapping = pd.DataFrame([
        {'warehouse_id': wh_id, 'warehouse_name': info['name'], 'tag_id': info['tag_id']}
        for wh_id, info in WAREHOUSE_TAG_MAPPING.items()
    ])
    
    to_upload = df_upload.merge(df_mapping, on='warehouse_id', how='left')
    
    to_upload['Description'] = (
        to_upload['warehouse_name'].astype(str)
        .str.replace(' ', '')
        .str.replace('-', '')
        + "QD"
    )
    
    cairo_now = datetime.now(CAIRO_TZ)
    start_date = cairo_now + timedelta(minutes=10)
    end_date = cairo_now + timedelta(hours=QD_DURATION_HOURS)
    
    to_upload['Start Date/Time'] = start_date.strftime('%d/%m/%Y %H:%M')
    to_upload['End Date/Time'] = end_date.strftime('%d/%m/%Y %H:%M')
    to_upload = to_upload.rename(columns={'tag_id': 'Tag ID'})
    
    to_upload = to_upload[['Tag ID', 'Description', 'Start Date/Time', 'End Date/Time', 'Discounts Group 1', 'Discounts Group 2']]
    to_upload = to_upload[to_upload['Tag ID'].notna()]
    
    filename = f'{QD_OUTPUT_DIR}/QD_V2_upload_{cairo_now.strftime("%Y%m%d_%H%M")}.xlsx'
    
    if not dry_run:
        to_upload.to_excel(filename, index=False)
        print(f"  Saved upload file: {filename} ({len(to_upload)} warehouses)")
    
    return to_upload, filename


def post_QD(filename: str) -> requests.Response:
    """Upload Quantity Discount file to MaxAB API."""
    token = get_access_token(
        'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
        'main-system-externals',
        API_SECRET
    )
    
    url = "https://api.maxab.info/commerce/api/admins/v1/quantity-discounts"
    
    files = [
        ('file', (filename, open(filename, 'rb'), 
                  'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))
    ]
    headers = {'Authorization': f'bearer {token}'}
    
    response = requests.request("POST", url, headers=headers, data={}, files=files)
    return response


def bulk_create_qd(qd_configs: list, df_work: pd.DataFrame, dry_run: bool = True) -> dict:
    """Bulk create Quantity Discounts using file upload method."""
    print("\n  Creating upload format...")
    
    df_upload = create_upload_format(df_work)
    
    print(f"  Upload format created: {len(df_upload)} warehouse rows")
    print(f"\n  Per warehouse breakdown:")
    for _, row in df_upload.iterrows():
        wh = row['warehouse_id']
        g1_count = len(row['Discounts Group 1'])
        g2_count = len(row['Discounts Group 2'])
        print(f"    WH {wh}: Group 1 = {g1_count} items, Group 2 = {g2_count} items")
    
    print("\n  Preparing upload file...")
    to_upload, filename = prepare_upload_file(df_upload, dry_run=dry_run)
    
    if dry_run:
        print(f"\n  [DRY RUN] Would upload {len(to_upload)} warehouses")
        return {
            "success": True,
            "created_count": len(qd_configs),
            "failed_count": 0,
            "errors": [],
            "upload_df": to_upload
        }
    
    print(f"\n  Uploading QD file to API...")
    response = post_QD(filename)
    
    if response.ok:
        print(f"  Upload succeeded (status: {response.status_code})")
        return {
            "success": True,
            "created_count": len(qd_configs),
            "failed_count": 0,
            "errors": [],
            "upload_df": to_upload
        }
    else:
        print(f"  Upload failed (status: {response.status_code})")
        print(f"  Response: {response.content[:500]}")
        return {
            "success": False,
            "created_count": 0,
            "failed_count": len(qd_configs),
            "errors": [{"error": f"API upload failed: {response.status_code}"}],
            "upload_df": to_upload
        }


def prepare_cart_rules_update(df_work: pd.DataFrame, df_qd_input: pd.DataFrame) -> pd.DataFrame:
    """Prepare cart rules update based on QD tier quantities."""
    cart_cols = ['product_id', 'warehouse_id']
    if 'current_cart_rule' in df_qd_input.columns:
        cart_cols.append('current_cart_rule')
    if 'new_cart_rule' in df_qd_input.columns:
        cart_cols.append('new_cart_rule')
    
    df_cart_merge = df_qd_input[cart_cols].drop_duplicates()
    df_work_cart = df_work.merge(df_cart_merge, on=['product_id', 'warehouse_id'], how='left')
    
    if 'new_cart_rule' in df_work_cart.columns:
        df_work_cart['effective_cart_rule'] = df_work_cart['new_cart_rule'].fillna(
            df_work_cart.get('current_cart_rule', 0)
        )
    else:
        df_work_cart['effective_cart_rule'] = df_work_cart.get('current_cart_rule', 0)
    
    df_work_cart['effective_cart_rule'] = df_work_cart['effective_cart_rule'].fillna(0)
    
    tier_cols = ['tier_1_qty', 'tier_2_qty', 'ws_qty']
    tier_cols = [c for c in tier_cols if c in df_work_cart.columns]
    df_work_cart['max_tier_qty'] = df_work_cart[tier_cols].max(axis=1, skipna=True)
    
    needs_update = df_work_cart['max_tier_qty'] > df_work_cart['effective_cart_rule']
    cart_rules_update = df_work_cart[needs_update][['cohort_id', 'product_id', 'packing_unit_id', 'max_tier_qty']].copy()
    cart_rules_update = cart_rules_update.rename(columns={'max_tier_qty': 'new_cart_rule'})
    
    cart_rules_update['new_cart_rule'] = cart_rules_update['new_cart_rule'].round().astype(int)
    cart_rules_update = cart_rules_update.groupby(['cohort_id', 'product_id', 'packing_unit_id'])['new_cart_rule'].max().reset_index()
    
    return cart_rules_update


def post_cart_rules(cohort_id: int, filename: str) -> requests.Response:
    """Upload Cart Rules file for a specific cohort."""
    token = get_access_token(
        'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
        'main-system-externals',
        API_SECRET
    )
    
    url = f"https://api.maxab.info/main-system/api/admin-portal/cohorts/{cohort_id}/cart-rules"
    
    files = [
        ('sheet', (filename, open(filename, 'rb'),
                   'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))
    ]
    headers = {'Authorization': f'bearer {token}'}
    
    response = requests.request("POST", url, headers=headers, data={}, files=files)
    return response


def upload_cart_rules(cart_rules_update: pd.DataFrame, dry_run: bool = True) -> dict:
    """Upload cart rules updates by cohort."""
    results = {'success': [], 'failed': []}
    
    print(f"\n  Cart rules to update: {len(cart_rules_update)} products across {cart_rules_update['cohort_id'].nunique()} cohorts")
    
    for cohort in cart_rules_update['cohort_id'].unique():
        req_data = cart_rules_update[cart_rules_update['cohort_id'] == cohort].copy()
        
        if len(req_data) > 0:
            req_data = req_data[['product_id', 'packing_unit_id', 'new_cart_rule']]
            req_data.columns = ['Product ID', 'Packing Unit ID', 'Cart Rules']
            
            filename = f'{QD_OUTPUT_DIR}/qd_v2_cart_rules_{cohort}.xlsx'
            
            if dry_run:
                print(f"    [DRY RUN] Cohort {cohort}: Would upload {len(req_data)} rules")
                results['success'].append(cohort)
                continue
            
            req_data.to_excel(filename, index=False)
            
            time.sleep(2)
            response = post_cart_rules(cohort, filename)
            
            if response.ok:
                print(f"    Cohort {cohort}: {len(req_data)} rules uploaded")
                results['success'].append(cohort)
            else:
                print(f"    Cohort {cohort}: Upload failed ({response.status_code})")
                results['failed'].append({'cohort_id': cohort, 'error': response.content[:200]})
    
    return results

print("✓ API functions defined")


In [None]:
# =============================================================================
# QD HANDLER V2 READY
# =============================================================================

print("="*70)
print("QD HANDLER V2 READY TO USE")
print("="*70)
print("\nAvailable functions:")
print("  - process_qd_v2(df_qd, dry_run=True)      : Main V2 function")
print("  - deactivate_active_qd(dry_run=True)      : Deactivate all active QDs")
print("  - apply_performance_adjustments(df)       : Apply conversion-based adjustments")
print("  - priority_selection_v2(df, limit=400)    : High DOH first selection")
print("\nV2 Key Features:")
print("  - Sequential adjustment: Lower qty first (2x), then boost discount (3x)")
print("  - Tier-specific thresholds: T1=3%, T2=7%, T3=5%")
print("  - High DOH priority: DOH>30 AND inv>10K ranked first")
print("  - Margin-based fallback for SKUs without market data")
print("  - Elasticity validation for T1/T2 only (T3 exempt)")
print("  - Wholesale min multiplier: 2x (was 3x)")
print("="*70)


In [None]:
# =============================================================================
# TEST CELL - Run this to test the V2 handler with sample data
# =============================================================================

# Create sample test data with all required columns
sample_df = pd.DataFrame({
    # Identifiers
    'product_id': [12345, 67890, 11111, 22222, 33333],
    'warehouse_id': [1, 1, 236, 236, 337],
    'cohort_id': [700, 700, 701, 701, 702],
    'sku': ['Test SKU A', 'Test SKU B', 'Test SKU C', 'Test SKU D', 'Test SKU E'],
    'brand': ['Brand A', 'Brand B', 'Brand A', 'Brand C', 'Brand B'],
    'cat': ['Cat 1', 'Cat 1', 'Cat 2', 'Cat 2', 'Cat 1'],
    
    # Pricing data
    'wac_p': [10.0, 20.0, 15.0, 25.0, 30.0],
    'current_price': [15.0, 30.0, 22.0, 38.0, 45.0],
    'new_price': [None, None, None, None, None],
    'target_margin': [0.33, 0.33, 0.32, 0.34, 0.33],
    'min_boundary': [11.0, 22.0, 16.0, 27.0, 32.0],
    
    # Market margins (converted to prices internally)
    'below_market': [0.25, 0.28, 0.26, 0.30, 0.27],
    'market_min': [0.28, 0.30, 0.28, 0.32, 0.29],
    'market_25': [0.30, 0.32, 0.30, 0.34, 0.31],
    'market_50': [0.32, 0.34, 0.32, 0.36, 0.33],
    'market_75': [0.34, 0.36, 0.34, 0.38, 0.35],
    'market_max': [0.36, 0.38, 0.36, 0.40, 0.37],
    'above_market': [0.38, 0.40, 0.38, 0.42, 0.39],
    
    # Margin tiers
    'margin_tier_1': [0.30, 0.32, 0.30, 0.34, 0.31],
    'margin_tier_2': [0.28, 0.30, 0.28, 0.32, 0.29],
    'margin_tier_3': [0.26, 0.28, 0.26, 0.30, 0.27],
    'margin_tier_4': [0.24, 0.26, 0.24, 0.28, 0.25],
    'margin_tier_5': [0.22, 0.24, 0.22, 0.26, 0.23],
    'margin_tier_above_1': [0.32, 0.34, 0.32, 0.36, 0.33],
    'margin_tier_above_2': [0.34, 0.36, 0.34, 0.38, 0.35],
    
    # Performance data (V2 NEW)
    'responsive_doh': [15, 45, 10, 60, 25],  # SKU B and D are high DOH
    'stocks': [100, 500, 50, 800, 150],
    'doh': [15, 45, 10, 60, 25],
    't1_cntrb_uth': [5.0, 1.0, 8.0, 0.0, 3.0],  # Tier 1 contribution
    't2_cntrb_uth': [3.0, 0.5, 6.0, 0.0, 2.0],  # Tier 2 contribution
    't3_cntrb_uth': [1.0, 0.0, 3.0, 0.0, 1.0],  # Tier 3 contribution
    'has_active_qd': [True, True, False, True, False],
    'mtd_qty': [200, 150, 300, 100, 250],
    
    # QD configuration
    'keep_qd_tiers': [['T1', 'T2', 'T3'], ['T1', 'T2'], ['T1', 'T2', 'T3'], ['T1', 'T2', 'T3'], ['T1', 'T2']],
    
    # Cart rules
    'current_cart_rule': [5, 10, 5, 10, 5],
    'new_cart_rule': [None, None, None, None, None],
})

print("="*70)
print("SAMPLE TEST DATA")
print("="*70)
print(f"\nTotal SKUs: {len(sample_df)}")
print(f"Warehouses: {sample_df['warehouse_id'].unique().tolist()}")
print(f"\nHigh DOH SKUs (DOH>{HIGH_DOH_THRESHOLD}):")
high_doh = sample_df[sample_df['responsive_doh'] > HIGH_DOH_THRESHOLD]
for _, row in high_doh.iterrows():
    inv_value = row['stocks'] * row['current_price']
    print(f"  - {row['sku']}: DOH={row['responsive_doh']}, inv_value={inv_value:.0f} EGP")

print(f"\nSKUs with existing QD (has_active_qd=True):")
for _, row in sample_df[sample_df['has_active_qd'] == True].iterrows():
    print(f"  - {row['sku']}: T1={row['t1_cntrb_uth']}%, T2={row['t2_cntrb_uth']}%, T3={row['t3_cntrb_uth']}%")

print("\n" + "="*70)
print("Running process_qd_v2 with dry_run=True...")
print("="*70)

# Run the V2 handler
result = process_qd_v2(sample_df, dry_run=True)

# Display results
print("\n" + "="*70)
print("RESULT INSPECTION")
print("="*70)
print(f"\nProcessing mode: {result['mode']}")
print(f"Total input: {result['total_input']}")
print(f"Processed: {result['processed']}")
print(f"Failed: {result['failed']}")
print(f"Total tiers: {result.get('total_tiers', 0)}")
print(f"High DOH count: {result.get('high_doh_count', 0)}")

if 'df_work' in result and len(result['df_work']) > 0:
    df_result = result['df_work']
    print(f"\n--- Selected SKUs ({len(df_result)}) ---")
    display_cols = ['sku', 'warehouse_id', 'is_high_doh', 
                    'tier_1_qty', 'tier_1_disc_pct', 't1_action',
                    'tier_2_qty', 'tier_2_disc_pct', 't2_action',
                    'ws_qty', 'ws_discount_pct', 't3_action']
    display_cols = [c for c in display_cols if c in df_result.columns]
    print(df_result[display_cols].to_string(index=False))

if 'qd_configs' in result and len(result['qd_configs']) > 0:
    print(f"\n--- QD Configs ({len(result['qd_configs'])}) ---")
    for config in result['qd_configs'][:3]:
        tiers_str = ", ".join([f"T{t['tier']}:qty={t['quantity']},disc={t['discount_pct']:.1f}%" for t in config['tiers']])
        print(f"  {config['sku']}: [{tiers_str}] (High DOH: {config.get('is_high_doh', 0)})")
