# Quantity Discount (QD) Pricing System

This notebook calculates tiered pricing and quantities for products across warehouses.

## Workflow:
1. **Setup** - Imports, connections, and configuration
2. **Product Selection** - Select top products per warehouse based on performance
3. **Quantity Tiers** - Calculate tier 1 and tier 2 quantities based on order history
4. **Market Prices** - Gather competitive pricing data
5. **Price Tiers** - Calculate discounted prices for each tier
6. **Wholesale Pricing** - Calculate wholesale prices for bulk orders
7. **Export** - Save results to Excel


## 1. Setup & Imports


In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import calendar
import json
from datetime import date, timedelta
from oauth2client.service_account import ServiceAccountCredentials
import setup_environment_2
import importlib
import import_ipynb
import warnings
from datetime import datetime, timedelta
import pytz  
import os
import snowflake.connector
import boto3
warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()
import base64
from botocore.exceptions import ClientError
from requests import get
from pathlib import Path
import requests
import time
import gspread

ModuleNotFoundError: No module named 'oauth2client'

### Configuration Constants


In [2]:
# =============================================================================
# CONFIGURATION - Modify these parameters as needed
# =============================================================================

# Cohort IDs for QD program
COHORT_IDS = [700, 701, 702, 703, 704, 1123, 1124, 1125, 1126]

# Warehouse mappings: (region, warehouse_name, warehouse_id, cohort_id)
WAREHOUSE_MAPPING = [
    ('Cairo', 'El-Marg', 38, 700),
    ('Cairo', 'Mostorod', 1, 700),
    ('Giza', 'Barageel', 236, 701),
    ('Giza', 'Sakkarah', 962, 701),
    ('Delta West', 'El-Mahala', 337, 703),
    ('Delta West', 'Tanta', 8, 703),
    ('Delta East', 'Mansoura FC', 339, 704),
    ('Delta East', 'Sharqya', 170, 704),
    ('Upper Egypt', 'Assiut FC', 501, 1124),
    ('Upper Egypt', 'Bani sweif', 401, 1126),
    ('Upper Egypt', 'Menya Samalot', 703, 1123),
    ('Upper Egypt', 'Sohag', 632, 1125),
    ('Alexandria', 'Khorshed Alex', 797, 702),
]

# Excluded warehouse IDs
EXCLUDED_WAREHOUSES = [6, 9, 10]

# Pricing parameters
MAX_DISCOUNT_PCT = 5.0      # Maximum discount allowed from current price (%)
MIN_DISCOUNT_PCT = 0.35     # Minimum discount required from current price (%)
MIN_RATIO = 1.3             # Minimum discount-to-quantity ratio
MAX_RATIO = 3             # Maximum discount-to-quantity ratio

# Product selection thresholds
MIN_ORDERS = 20             # Minimum orders in 4 months
MIN_RETAILERS = 5           # Minimum unique retailers
MIN_NMV = 5000              # Minimum revenue (EGP)
MIN_VELOCITY = 0.5          # Minimum units per day

# Ranking parameters
TOP_PRODUCTS_PER_WAREHOUSE = 200   # Initial selection
FINAL_PRODUCTS_PER_WAREHOUSE = 133 # Final output

# Delivery fees
DELIVERY_FEE_CAIRO_GIZA = 25
DELIVERY_FEE_OTHER = 20

print("Configuration loaded successfully!")


Configuration loaded successfully!


### Database Connection Function


In [3]:
def snowflake_query(country, query, warehouse=None, columns=[], conn=None):
    import snowflake.connector
    
    con = snowflake.connector.connect(
        user =  os.environ["SNOWFLAKE_USERNAME"],
        account= os.environ["SNOWFLAKE_ACCOUNT"],
        password= os.environ["SNOWFLAKE_PASSWORD"],
        database =os.environ["SNOWFLAKE_DATABASE"]
    )

    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        
        column_names = [col[0] for col in cur.description]
        
        results = cur.fetchall()
        
        if not results:
            out = pd.DataFrame(columns=[name.lower() for name in column_names])
        else:
            if len(columns) == 0:
                out = pd.DataFrame(np.array(results), columns=column_names)
                out.columns = out.columns.str.lower()
            else:
                out = pd.DataFrame(np.array(results), columns=columns)
                out.columns = out.columns.str.lower()
        
        return out
    except Exception as e:
        print(f"An error occurred: {e}")
        raise
    finally:
        cur.close()
        con.close()

In [4]:
query = '''
SHOW PARAMETERS LIKE 'TIMEZONE'
'''
x  = snowflake_query("Egypt",query)
zone_to_use = x['value'].values[0]

### Google Sheets Connection (Force Brands)


In [6]:
# scope = ["https://spreadsheets.google.com/feeds",
#          'https://www.googleapis.com/auth/spreadsheets',
#          "https://www.googleapis.com/auth/drive.file",
#          "https://www.googleapis.com/auth/drive"]
# creds = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(setup_environment_2.get_secret("prod/maxab-sheets")), scope)
# client = gspread.authorize(creds)
# force_brands = client.open('QD_brands').worksheet('Include_brands')
# force_brands_df = pd.DataFrame(force_brands.get_all_records())
# if(force_brands_df.empty):
#     force_brands_df = pd.DataFrame(columns=['brand'])
#     brand_filter = ""
# else:
#     brand_filter = f"OR brand IN ({','.join([repr(b) for b in list(force_brands_df.brand.unique())])})"

APIError: {'code': 500, 'message': 'Internal error encountered.', 'status': 'INTERNAL'}

## 2. Product Selection

Select top-performing products per warehouse based on:
- Gross profit ranking (40% weight)
- Sales velocity ranking (25% weight)
- Order count ranking (20% weight)
- Retailer count ranking (15% weight)


In [7]:
query = ''' 
WITH rr AS (
    SELECT product_id, warehouse_id, rr
    FROM (
        SELECT *, 
               MAX(date) OVER (PARTITION BY product_id, warehouse_id) as max_date
        FROM finance.PREDICTED_RUNNING_RATES
        QUALIFY date = max_date
            AND date::date >= CURRENT_DATE - 14 
    )
),

stocks AS (
    SELECT 
        warehouse_id,
        product_id,
        SUM(stocks) as stocks,
        CASE 
            WHEN SUM(rr) > 0 THEN SUM(stocks) / SUM(rr) 
            ELSE SUM(stocks) 
        END as doh
    FROM (
        SELECT DISTINCT 
            product_warehouse.warehouse_id,
            product_warehouse.product_id,
            (product_warehouse.available_stock)::integer as stocks,
            COALESCE(rr.rr, 0) as rr 
        FROM product_warehouse
        JOIN products ON product_warehouse.product_id = products.id
        JOIN product_units ON products.unit_id = product_units.id
        LEFT JOIN rr ON rr.product_id = products.id 
            AND rr.warehouse_id = product_warehouse.warehouse_id
        WHERE product_warehouse.warehouse_id NOT IN (6, 9, 10)
            AND product_warehouse.is_basic_unit = 1
            AND product_warehouse.available_stock > 0 
    )
    GROUP BY warehouse_id, product_id
    HAVING doh >= 1
),

base AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
    FROM (
        SELECT x.*, TAGGABLE_ID as retailer_id 
        FROM (
            SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
            FROM cohorts 
            WHERE is_active = 'true'
                AND id IN (700,701,702,703,704,1123,1124,1125,1126)
        ) x 
        JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
        WHERE dt.taggable_id not IN (
            SELECT taggable_id FROM DYNAMIC_TAGgables 
            WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
        )
    )
    QUALIFY rnk = 1 
    ORDER BY cohort_id
),

-- Count total retailers per warehouse for penetration calculation
warehouse_retailer_counts AS (
    SELECT 
        whs.warehouse_id,
        COUNT(DISTINCT base.retailer_id) as total_warehouse_retailers
    FROM base
    CROSS JOIN (SELECT DISTINCT warehouse_id FROM (VALUES
            (38), (1), (236), (962), (337), (8), (339), (170), 
            (501), (401), (703), (632), (797)
        ) x(warehouse_id)
    ) whs
    GROUP BY whs.warehouse_id
),

-- Map cohorts to warehouses
cohort_warehouse_map AS (
    SELECT cohort_id, warehouse_id
    FROM (VALUES
        (700, 38),   -- Cairo -> El-Marg
        (700, 1),    -- Cairo -> Mostorod
        (701, 236),  -- Giza -> Barageel
        (701, 962),  -- Giza -> Sakkarah
        (703, 337),  -- Delta West -> El-Mahala
        (703, 8),    -- Delta West -> Tanta
        (704, 339),  -- Delta East -> Mansoura FC
        (704, 170),  -- Delta East -> Sharqya
        (1124, 501), -- Upper Egypt -> Assiut FC
        (1126, 401), -- Upper Egypt -> Bani sweif
        (1123, 703), -- Upper Egypt -> Menya Samalot
        (1125, 632), -- Upper Egypt -> Sohag
        (702, 797)   -- Alexandria -> Khorshed Alex
    ) x(cohort_id, warehouse_id)
),

-- Get pricing information by cohort (which maps to warehouse)
cohort_prices AS (
    SELECT  
        cpu.cohort_id,
        pu.product_id,
        pu.packing_unit_id,
        pu.basic_unit_count,
        AVG(cpu.price) as price
    FROM cohort_product_packing_units cpu
    JOIN PACKING_UNIT_PRODUCTS pu ON pu.id = cpu.product_packing_unit_id
    WHERE cpu.cohort_id IN (700,701,702,703,704,1123,1124,1125,1126)
        AND cpu.created_at::date <> '2023-07-31'
        AND cpu.is_customized = true
    GROUP BY 
        cpu.cohort_id,
        pu.product_id,
        pu.packing_unit_id,
        pu.basic_unit_count
),

-- Get live prices by cohort
live_cohort_prices AS (
    SELECT 
        cohort_id,
        product_id,
        pu_id as packing_unit_id,
        buc as basic_unit_count,
        NEW_PRICE as price
    FROM materialized_views.DBDP_PRICES
    WHERE created_at = CURRENT_DATE
        AND DATE_PART('hour', CURRENT_TIME) BETWEEN SPLIT_PART(time_slot, '-', 1)::int AND SPLIT_PART(time_slot, '-', 2)::int
        AND cohort_id IN (700,701,702,703,704,1123,1124,1125,1126)
),

-- Combine live and historical prices (live takes priority)
combined_cohort_prices AS (
    SELECT *
    FROM (
        SELECT *, 1 AS priority FROM live_cohort_prices
        UNION ALL
        SELECT *, 2 AS priority FROM cohort_prices
    )
    QUALIFY ROW_NUMBER() OVER (PARTITION BY cohort_id, product_id, packing_unit_id ORDER BY priority) = 1
),

-- Map cohort prices to warehouse prices
warehouse_prices AS (
    SELECT 
        cwm.warehouse_id,
        ccp.product_id,
        ccp.packing_unit_id,
        ccp.basic_unit_count,
        ccp.price
    FROM combined_cohort_prices ccp
    JOIN cohort_warehouse_map cwm ON cwm.cohort_id = ccp.cohort_id
    WHERE ccp.price IS NOT NULL
),

-- Get sales performance over last 4 months
product_performance AS (
    SELECT 
        w.name as warehouse,
        w.id as warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) as sku,
        brands.name_ar as brand,
        categories.name_ar as category,
        
        -- Core volume metrics
        COUNT(DISTINCT so.parent_sales_order_id) as total_orders,
        COUNT(DISTINCT so.retailer_id) as total_retailers,
        SUM(pso.purchased_item_count) as total_packing_units_sold,
        SUM(pso.purchased_item_count * pso.basic_unit_count) as total_basic_units_sold,
        
        -- Revenue and margin
        SUM(pso.total_price) as total_nmv,
        SUM(COALESCE(f.wac_p, 0) * pso.purchased_item_count * pso.basic_unit_count) as total_cogs,
        (SUM(pso.total_price) - SUM(COALESCE(f.wac_p, 0) * pso.purchased_item_count * pso.basic_unit_count)) / 
            NULLIF(SUM(pso.total_price), 0) as blended_margin,
        
        -- Average order metrics
        AVG(pso.purchased_item_count) as avg_packing_units_per_order,
        
        -- Velocity metrics (units per day)
        SUM(pso.purchased_item_count) / 120.0 as packing_units_per_day
        
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN base ON base.retailer_id = so.retailer_id
    JOIN products ON products.id = pso.product_id
    JOIN brands ON products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id 
        AND categories.name_ar NOT LIKE '%سايب%'
    JOIN finance.all_cogs f ON f.product_id = pso.product_id
        AND f.from_date::date <= so.created_at::date
        AND f.to_date::date > so.created_at::date
    JOIN product_units ON product_units.id = products.unit_id
	join warehouses w on w.id = pso.warehouse_id
    
    WHERE TRUE
        AND so.created_at::date BETWEEN current_date - 60 AND CURRENT_DATE - 1
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
        AND products.activation = 'true'
        AND w.id NOT IN (6, 9, 10)
    
    GROUP BY All
),

-- Add retailer penetration
product_performance_with_penetration AS (
    SELECT 
        pp.*,
        wrc.total_warehouse_retailers,
        (pp.total_retailers * 100.0 / NULLIF(wrc.total_warehouse_retailers, 0)) as retailer_penetration_pct
    FROM product_performance pp
    LEFT JOIN warehouse_retailer_counts wrc ON wrc.warehouse_id = pp.warehouse_id
),

-- Add pricing information at warehouse level
product_performance_with_price AS (
    SELECT 
        pp.*,
        COALESCE(wp.price, 0) as product_price,
        COALESCE(wp.basic_unit_count, 1) as basic_unit_count
    FROM product_performance_with_penetration pp
    LEFT JOIN warehouse_prices wp ON wp.warehouse_id = pp.warehouse_id
        AND wp.product_id = pp.product_id 
        AND wp.packing_unit_id = pp.packing_unit_id
),

-- Add quality filters to focus on high-potential products
qualified_products AS (
    SELECT 
        pp.warehouse,
        pp.warehouse_id,
        pp.product_id,
        pp.packing_unit_id,
        pp.sku,
        pp.brand,
        pp.category,
        pp.total_orders,
        pp.total_retailers,
        pp.total_packing_units_sold,
        pp.total_basic_units_sold,
        pp.total_nmv,
        pp.blended_margin,
        pp.avg_packing_units_per_order,
        pp.packing_units_per_day,
        pp.retailer_penetration_pct,
        pp.product_price,
        pp.basic_unit_count,
        s.doh,
        s.stocks,
        
        -- Calculate a simple volume-based score
        (pp.total_nmv * pp.blended_margin) as gross_profit,
        
        -- Rank by gross profit within warehouse
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY (pp.total_nmv * pp.blended_margin) DESC) as gp_rank,
        
        -- Rank by velocity
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY pp.packing_units_per_day DESC) as velocity_rank,
        
        -- Rank by orders
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY pp.total_orders DESC) as order_rank,
        
        -- Rank by number of retailers
        ROW_NUMBER() OVER (PARTITION BY pp.warehouse_id ORDER BY pp.total_retailers DESC) as retailer_rank
        
    FROM product_performance_with_price pp
    JOIN stocks s ON s.product_id = pp.product_id 
        AND s.warehouse_id = pp.warehouse_id

),

-- Select top products using a combined scoring approach
top_products AS (
    SELECT 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category,
        total_orders,
        total_retailers,
        total_packing_units_sold,
        total_basic_units_sold,
        ROUND(total_nmv, 2) as total_nmv,
        ROUND(blended_margin * 100, 2) as margin_pct,
        ROUND(avg_packing_units_per_order, 2) as avg_order_qty,
        ROUND(packing_units_per_day, 2) as units_per_day,
        ROUND(retailer_penetration_pct, 1) as retailer_penetration_pct,
        ROUND(gross_profit, 2) as gross_profit,
        ROUND(product_price, 2) as packing_unit_price,
        basic_unit_count,
        ROUND(product_price / NULLIF(basic_unit_count, 0), 2) as price_per_basic_unit,
        gp_rank,
        velocity_rank,
        order_rank,
        retailer_rank,
        ROUND(doh, 2) as days_on_hand,
        stocks as available_stock,
        
        -- Combined score: weighted average of ranks (lower is better)
        (gp_rank * 0.15 + velocity_rank * 0.20 + order_rank * 0.30 + retailer_rank * 0.35) as combined_rank_score
        
    FROM qualified_products
)

SELECT 
    warehouse,
    warehouse_id,
    product_id,
    packing_unit_id,
    sku,
    brand,
    category as cat,
    total_orders,
    total_retailers,
    total_packing_units_sold,
    total_basic_units_sold,
    total_nmv,
    margin_pct,
    avg_order_qty,
    units_per_day,
    retailer_penetration_pct,
    gross_profit,
    packing_unit_price,
    basic_unit_count,
    price_per_basic_unit,
    days_on_hand,
    available_stock,
    gp_rank as gross_profit_rank,
    velocity_rank,
    order_rank,
    retailer_rank,
    ROUND(combined_rank_score, 2) as combined_score,
    ROW_NUMBER() OVER (PARTITION BY warehouse ORDER BY combined_rank_score) as final_rank
FROM top_products
WHERE combined_rank_score <= 500  -- Adjust this to get more/fewer products
qualify final_rank<=200
ORDER BY warehouse, combined_rank_score;
'''
selected_products = snowflake_query("Egypt",query)
for col in selected_products.columns:
    selected_products[col] = pd.to_numeric(selected_products[col], errors='ignore') 

## 3. Quantity Tier Calculation

Calculate tier 1 and tier 2 quantities based on:
- Order history from frequent buyers (2+ orders)
- Statistical analysis (median, Q3, P85, P90, P95)
- IQR outlier removal


In [8]:
selected_df = selected_products[['warehouse_id', 'product_id', 'packing_unit_id']].values.tolist()
tuples_string = ','.join([f"({int(wh_id)}, {int(prod_id)}, {int(pu_id)})" for wh_id, prod_id, pu_id in selected_df])
query = f'''
WITH selected_products AS (
    SELECT warehouse_id, product_id, packing_unit_id
    FROM (VALUES
      {tuples_string}
    ) AS x(warehouse_id, product_id, packing_unit_id)
),

-- Same base filtering as product selection query
-- Retailers in QD cohorts AND in specific dynamic tags
base AS (
    SELECT *, ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY priority) as rnk 
    FROM (
        SELECT x.*, TAGGABLE_ID as retailer_id 
        FROM (
            SELECT id as cohort_id, name as cohort_name, priority, dynamic_tag_id 
            FROM cohorts 
            WHERE is_active = 'true'
                AND id IN (700,701,702,703,704,1123,1124,1125,1126)
        ) x 
        JOIN DYNAMIC_TAGgables dt ON x.dynamic_tag_id = dt.dynamic_tag_id
        WHERE dt.taggable_id not IN (
            SELECT taggable_id FROM DYNAMIC_TAGgables 
            WHERE dynamic_tag_id IN (2807, 2808, 2809, 2810, 2811, 2812)
        )
    )
    QUALIFY rnk = 1 
),

raw_order_quantities AS (
    SELECT 
        whs.wh as warehouse,
        whs.warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) as sku,
        brands.name_ar as brand,
        categories.name_ar as category,
        so.parent_sales_order_id,
        so.retailer_id,
        so.created_at::date as order_date,
        SUM(pso.purchased_item_count) as order_qty,
        SUM(pso.total_price) as order_value,
        -- ADD RECENCY WEIGHT: Recent orders get higher weight (exponential decay)
        EXP(-0.02 * DATEDIFF('day', so.created_at::date, CURRENT_DATE)) as recency_weight
        
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    -- Filter to only include retailers from base (same cohorts + tags as product selection)
    JOIN base ON base.retailer_id = so.retailer_id
    JOIN products ON products.id = pso.product_id
    JOIN brands ON products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id
    JOIN product_units ON product_units.id = products.unit_id
    JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
    JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
    JOIN cities ON cities.id = districts.city_id
    JOIN states ON states.id = cities.state_id
    JOIN regions ON regions.id = states.region_id
    JOIN (SELECT * FROM (VALUES
            ('Cairo', 'El-Marg', 38),
            ('Cairo', 'Mostorod', 1),
            ('Giza', 'Barageel', 236),
            ('Giza', 'Sakkarah', 962),
            ('Delta West', 'El-Mahala', 337),
            ('Delta West', 'Tanta', 8),
            ('Delta East', 'Mansoura FC', 339),
            ('Delta East', 'Sharqya', 170),
            ('Upper Egypt', 'Assiut FC', 501),
            ('Upper Egypt', 'Bani sweif', 401),
            ('Upper Egypt', 'Menya Samalot', 703),
            ('Upper Egypt', 'Sohag', 632),
            ('Alexandria', 'Khorshed Alex', 797)
        ) x(region_name, wh, warehouse_id)
    ) whs ON whs.region_name = CASE WHEN regions.id = 2 THEN states.name_en ELSE regions.name_en END
    JOIN selected_products sp ON sp.warehouse_id = whs.warehouse_id 
        AND sp.product_id = pso.product_id
        AND sp.packing_unit_id = pso.packing_unit_id
    
    WHERE TRUE
        AND so.created_at::date BETWEEN DATE_TRUNC('month', CURRENT_DATE - INTERVAL '4 months') AND CURRENT_DATE - 1
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
        AND products.activation = 'true'
    
    GROUP BY 
        whs.wh,
        whs.warehouse_id,
        pso.product_id,
        pso.packing_unit_id,
        products.name_ar,
        products.size,
        product_units.name_ar,
        brands.name_ar,
        categories.name_ar,
        so.parent_sales_order_id,
        so.retailer_id,
        so.created_at::date
),

retailer_frequency AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        retailer_id,
        COUNT(DISTINCT parent_sales_order_id) as order_count,
        COUNT(DISTINCT DATE_TRUNC('week', order_date)) as weeks_ordered,
        MIN(order_date) as first_order_date,
        MAX(order_date) as last_order_date,
        DATEDIFF('day', MIN(order_date), MAX(order_date)) as days_span,
        CASE 
            WHEN COUNT(DISTINCT parent_sales_order_id) > 1 
            THEN DATEDIFF('day', MIN(order_date), MAX(order_date)) / (COUNT(DISTINCT parent_sales_order_id) - 1)
            ELSE NULL 
        END as avg_days_between_orders
    FROM raw_order_quantities
    GROUP BY warehouse_id, product_id, packing_unit_id, retailer_id
),

frequent_buyers AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        retailer_id,
        order_count,
        weeks_ordered,
        avg_days_between_orders
    FROM retailer_frequency
    WHERE order_count >= 2 
       OR weeks_ordered >= 2
),

filtered_orders AS (
    SELECT roq.*
    FROM raw_order_quantities roq
    JOIN frequent_buyers fb 
        ON fb.warehouse_id = roq.warehouse_id
        AND fb.product_id = roq.product_id
        AND fb.packing_unit_id = roq.packing_unit_id
        AND fb.retailer_id = roq.retailer_id
),

initial_stats AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_qty) as q1,
        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_qty) as q3,
        MEDIAN(order_qty) as median_qty,
        STDDEV_POP(order_qty) as stddev_qty,
        AVG(order_qty) as avg_qty
    FROM filtered_orders
    GROUP BY warehouse_id, product_id, packing_unit_id
),

cleaned_orders AS (
    SELECT fo.*
    FROM filtered_orders fo
    JOIN initial_stats ist 
        ON ist.warehouse_id = fo.warehouse_id
        AND ist.product_id = fo.product_id
        AND ist.packing_unit_id = fo.packing_unit_id
    WHERE TRUE
        AND fo.order_qty >= ist.q1 - 1.5 * (ist.q3 - ist.q1)
        AND fo.order_qty <= ist.q3 + 1.5 * (ist.q3 - ist.q1)
        AND (ist.stddev_qty = 0 
             OR ABS(fo.order_qty - ist.avg_qty) <= 3 * ist.stddev_qty)
),

-- MODIFIED: Recent orders stats (last 15 days)
recent_trends AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        -- Weighted average gives more importance to recent orders
        SUM(order_qty * recency_weight) / NULLIF(SUM(recency_weight), 0) as weighted_avg_qty,
        -- Last 15 days statistics
        AVG(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_avg,
        MEDIAN(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_median,
        MAX(CASE WHEN order_date >= CURRENT_DATE - 15 THEN order_qty END) as last_15d_max,
        COUNT(CASE WHEN order_date >= CURRENT_DATE - 15 THEN 1 END) as last_15d_orders
    FROM cleaned_orders
    GROUP BY warehouse_id, product_id, packing_unit_id
),

quantity_stats AS (
    SELECT 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category,
        
        COUNT(DISTINCT parent_sales_order_id) as total_orders,
        COUNT(DISTINCT retailer_id) as total_retailers,
        
        MIN(order_qty) as min_qty,
        MAX(order_qty) as max_qty,
        AVG(order_qty) as avg_qty,
        MEDIAN(order_qty) as median_qty,
        STDDEV_POP(order_qty) as stddev_qty,
        
        PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_qty) as q1_qty,
        PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY order_qty) as q2_qty,
        PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_qty) as q3_qty,
        PERCENTILE_CONT(0.85) WITHIN GROUP (ORDER BY order_qty) as p85_qty,
        PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY order_qty) as p90_qty,
        PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY order_qty) as p95_qty,
        
        SUM(order_value) as total_revenue,
        AVG(order_value) as avg_order_value
        
    FROM cleaned_orders
    GROUP BY 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category
),

frequency_table AS (
    SELECT
        warehouse_id,
        product_id,
        packing_unit_id,
        order_qty,
        COUNT(DISTINCT parent_sales_order_id) AS freq
    FROM cleaned_orders
    GROUP BY warehouse_id, product_id, packing_unit_id, order_qty
),

lag_lead AS (
    SELECT
        warehouse_id,
        product_id,
        packing_unit_id,
        order_qty,
        freq,
        LAG(freq) OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY order_qty) AS prev_freq,
        LEAD(freq) OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY order_qty) AS next_freq
    FROM frequency_table
),

most_frequent_qty AS (
    SELECT 
        warehouse_id,
        product_id,
        packing_unit_id,
        order_qty as mode_qty,
        freq as mode_freq,
        freq * 1.0 / SUM(freq) OVER (PARTITION BY warehouse_id, product_id, packing_unit_id) as mode_contribution
    FROM (
        SELECT *,
               ROW_NUMBER() OVER (PARTITION BY warehouse_id, product_id, packing_unit_id ORDER BY freq DESC, order_qty DESC) as rn
        FROM lag_lead
        WHERE (freq > COALESCE(prev_freq, -1))
          AND (freq > COALESCE(next_freq, -1))
    )
    WHERE rn = 1
),

frequency_metrics AS (
    SELECT 
        fb.warehouse_id,
        fb.product_id,
        fb.packing_unit_id,
        COUNT(DISTINCT fb.retailer_id) as frequent_retailer_count,
        AVG(fb.order_count) as avg_orders_per_retailer,
        AVG(fb.avg_days_between_orders) as avg_refill_days,
        MEDIAN(fb.avg_days_between_orders) as median_refill_days
    FROM frequent_buyers fb
    GROUP BY fb.warehouse_id, fb.product_id, fb.packing_unit_id
),

tier_calculations AS (
    SELECT 
        qs.*,
        COALESCE(mf.mode_qty, qs.median_qty) as mode_qty,
        COALESCE(mf.mode_freq, 0) as mode_freq,
        COALESCE(mf.mode_contribution, 0) as mode_contribution,
        COALESCE(fm.frequent_retailer_count, 0) as frequent_retailer_count,
        COALESCE(fm.avg_orders_per_retailer, 0) as avg_orders_per_retailer,
        COALESCE(fm.avg_refill_days, 0) as avg_refill_days,
        COALESCE(fm.median_refill_days, 0) as median_refill_days,
        
        -- ADD: Recency metrics
        rt.weighted_avg_qty,
        rt.last_15d_avg,
        rt.last_15d_median,
        rt.last_15d_max,
        rt.last_15d_orders,
        
        -- MODIFIED: Tier 1 with 15-day recency factor
        -- Blends historical median with recent trends (70% historical, 30% recent)
        CEIL(GREATEST(
            (0.7 * qs.median_qty + 0.3 * COALESCE(rt.weighted_avg_qty, qs.median_qty)) + 1.0 * COALESCE(qs.stddev_qty, 1),
            qs.q3_qty,
            COALESCE(mf.mode_qty, qs.median_qty) + GREATEST(3, qs.median_qty * 0.3),
            -- If recent 15 days show growth, adjust upward
            CASE 
                WHEN rt.last_15d_orders >= 3 AND rt.last_15d_median > qs.median_qty 
                THEN rt.last_15d_median * 1.2
                ELSE qs.median_qty * 1.4
            END,
            qs.median_qty + 3
        )) as tier_1_qty,
        
        -- MODIFIED: Tier 2 with 15-day recency factor
        CEIL(GREATEST(
            qs.q3_qty + 1.5 * COALESCE(qs.stddev_qty, 1),
            qs.p85_qty + 1.0 * COALESCE(qs.stddev_qty, 1),
            qs.p90_qty + 0.5 * COALESCE(qs.stddev_qty, 1),
            qs.p95_qty,
            -- Blend historical and weighted average
            (0.6 * qs.median_qty + 0.4 * COALESCE(rt.weighted_avg_qty, qs.median_qty)) * 2.0,
            -- If last 15 days show higher demand, adjust tier 2 upward
            CASE 
                WHEN rt.last_15d_orders >= 3 AND rt.last_15d_max > qs.p90_qty 
                THEN rt.last_15d_max * 1.1
                ELSE qs.median_qty * 2.0
            END
        )) as tier_2_qty_base
        
    FROM quantity_stats qs
    LEFT JOIN most_frequent_qty mf 
        ON mf.warehouse_id = qs.warehouse_id 
        AND mf.product_id = qs.product_id
        AND mf.packing_unit_id = qs.packing_unit_id
    LEFT JOIN frequency_metrics fm
        ON fm.warehouse_id = qs.warehouse_id
        AND fm.product_id = qs.product_id
        AND fm.packing_unit_id = qs.packing_unit_id
    LEFT JOIN recent_trends rt
        ON rt.warehouse_id = qs.warehouse_id
        AND rt.product_id = qs.product_id
        AND rt.packing_unit_id = qs.packing_unit_id
),

tier_adjustments AS (
    SELECT 
        warehouse,
        warehouse_id,
        product_id,
        packing_unit_id,
        sku,
        brand,
        category,
        total_orders,
        total_retailers,
        min_qty,
        avg_qty,
        median_qty,
        stddev_qty,
        q1_qty,
        q3_qty,
        p85_qty,
        p90_qty,
        p95_qty,
        max_qty,
        mode_qty,
        mode_freq,
        mode_contribution,
        frequent_retailer_count,
        avg_orders_per_retailer,
        avg_refill_days,
        median_refill_days,
        total_revenue,
        avg_order_value,
        
        -- ADD: Recency metrics to output
        weighted_avg_qty,
        last_15d_avg,
        last_15d_median,
        last_15d_max,
        last_15d_orders,
        
        tier_1_qty,
        LEAST(
            CEIL(GREATEST(
                tier_2_qty_base,
                tier_1_qty * 1.6
            )),
            GREATEST(
                tier_1_qty * 3.5,
                tier_1_qty + 20
            )
        ) as tier_2_qty
        
    FROM tier_calculations
),

retailer_distribution AS (
    SELECT 
        co.warehouse_id,
        co.product_id,
        co.packing_unit_id,
        ta.tier_1_qty,
        ta.tier_2_qty,
        COUNT(DISTINCT CASE 
            WHEN co.order_qty < ta.tier_1_qty THEN co.retailer_id 
        END) as retailers_below_t1,
        COUNT(DISTINCT CASE 
            WHEN co.order_qty >= ta.tier_1_qty AND co.order_qty < ta.tier_2_qty THEN co.retailer_id 
        END) as retailers_at_t1,
        COUNT(DISTINCT CASE 
            WHEN co.order_qty >= ta.tier_2_qty THEN co.retailer_id 
        END) as retailers_at_t2,
        COUNT(CASE 
            WHEN co.order_qty < ta.tier_1_qty THEN 1 
        END) as orders_below_t1,
        COUNT(CASE 
            WHEN co.order_qty >= ta.tier_1_qty AND co.order_qty < ta.tier_2_qty THEN 1 
        END) as orders_at_t1,
        COUNT(CASE 
            WHEN co.order_qty >= ta.tier_2_qty THEN 1 
        END) as orders_at_t2
    FROM cleaned_orders co
    JOIN tier_adjustments ta 
        ON ta.warehouse_id = co.warehouse_id 
        AND ta.product_id = co.product_id
        AND ta.packing_unit_id = co.packing_unit_id
    GROUP BY 
        co.warehouse_id,
        co.product_id,
        co.packing_unit_id,
        ta.tier_1_qty,
        ta.tier_2_qty
)

SELECT 
    ta.warehouse,
    ta.warehouse_id,
    ta.product_id,
    ta.packing_unit_id,
    ta.sku,
    ta.brand,
    ta.category,
    
    ta.frequent_retailer_count,
    ROUND(ta.avg_orders_per_retailer, 2) as avg_orders_per_retailer,
    ROUND(ta.avg_refill_days, 1) as avg_refill_days,
    ROUND(ta.median_refill_days, 1) as median_refill_days,
    
    ta.total_orders,
    ta.total_retailers,
    
    ta.min_qty,
    ROUND(ta.avg_qty, 2) as avg_qty,
    ta.median_qty,
    ROUND(ta.weighted_avg_qty, 2) as weighted_avg_qty,
    ta.q1_qty as q1_25_qty,
    ta.q3_qty as q3_75_qty,
    ta.p85_qty,
    ta.p90_qty,
    ta.p95_qty,
    ta.max_qty,
    ROUND(ta.stddev_qty, 2) as stddev_qty,
    ta.mode_qty,
    ta.mode_freq,
    ROUND(ta.mode_contribution * 100, 1) as mode_pct,
    
    -- MODIFIED: 15-day trend metrics
    ROUND(ta.last_15d_avg, 2) as last_15d_avg,
    ta.last_15d_median,
    ta.last_15d_max,
    ta.last_15d_orders,
    
    ta.tier_1_qty,
    ta.tier_2_qty,
    ROUND((ta.tier_1_qty - ta.median_qty) * 100.0 / NULLIF(ta.median_qty, 0), 1) as tier_1_increase_pct,
    ROUND((ta.tier_2_qty - ta.median_qty) * 100.0 / NULLIF(ta.median_qty, 0), 1) as tier_2_increase_pct,
    ROUND(ta.tier_2_qty * 1.0 / NULLIF(ta.tier_1_qty, 0), 2) as tier_2_to_tier_1_ratio,
    
    rd.retailers_below_t1,
    rd.retailers_at_t1,
    rd.retailers_at_t2,
    
    rd.orders_below_t1,
    rd.orders_at_t1,
    rd.orders_at_t2,
    
    ROUND(100.0 * rd.retailers_below_t1 / NULLIF(ta.total_retailers, 0), 1) as pct_retailers_below_t1,
    ROUND(100.0 * rd.retailers_at_t1 / NULLIF(ta.total_retailers, 0), 1) as pct_retailers_at_t1,
    ROUND(100.0 * rd.retailers_at_t2 / NULLIF(ta.total_retailers, 0), 1) as pct_retailers_at_t2,
    
    ROUND(100.0 * rd.orders_below_t1 / NULLIF(ta.total_orders, 0), 1) as pct_orders_below_t1,
    ROUND(100.0 * rd.orders_at_t1 / NULLIF(ta.total_orders, 0), 1) as pct_orders_at_t1,
    ROUND(100.0 * rd.orders_at_t2 / NULLIF(ta.total_orders, 0), 1) as pct_orders_at_t2,
    
    ROUND(ta.total_revenue, 2) as total_revenue,
    ROUND(ta.avg_order_value, 2) as avg_order_value

FROM tier_adjustments ta
JOIN retailer_distribution rd 
    ON rd.warehouse_id = ta.warehouse_id 
    AND rd.product_id = ta.product_id
    AND rd.packing_unit_id = ta.packing_unit_id
ORDER BY ta.warehouse, ta.total_orders DESC
'''
tiers_selection = snowflake_query("Egypt",query)
for col in tiers_selection.columns:
    tiers_selection[col] = pd.to_numeric(tiers_selection[col], errors='ignore') 


### SKU Information & Cost Data


In [9]:
query = f'''
SELECT  DIStinct  
		products.id as product_id,
		CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
		brands.name_ar as brand, 
		categories.name_ar as cat,
		f.wac_p
from products 
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f  ON f.product_id = products.id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp()) between f.from_date and f.to_date 
JOIN product_units ON product_units.id = products.unit_id 
'''
sku_info  = snowflake_query("Egypt",query)
sku_info.product_id=pd.to_numeric(sku_info.product_id)
sku_info.wac_p=pd.to_numeric(sku_info.wac_p)

## 4. Market Prices

Gather competitive pricing data from multiple sources:
- **Marketplace prices** - Regional marketplace data with fallbacks
- **Ben Soliman prices** - Competitor pricing
- **Scraped prices** - Web-scraped competitor data
- **Product statistics** - Historical margin boundaries

### 4.1 Marketplace Prices


In [10]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id)),
full_data as (
select products.id as product_id, region,warehouse_id
from products , whs 
where activation = 'true'
),				

MP as (
select region,product_id,
min(min_price) as min_price,
min(max_price) as max_price,
min(mod_price) as mod_price,
min(true_min) as true_min,
min(true_max) as true_max

from (
select mp.region,mp.product_id,mp.pu_id,
min_price/BASIC_UNIT_COUNT as min_price,
max_price/BASIC_UNIT_COUNT as max_price,
mod_price/BASIC_UNIT_COUNT as mod_price,
TRUE_MIN_PRICE/BASIC_UNIT_COUNT as true_min,
TRUE_MAX_PRICE/BASIC_UNIT_COUNT as true_max
from materialized_views.marketplace_prices mp 
join packing_unit_products pup on pup.product_id = mp.product_id and pup.packing_unit_id = mp.pu_id
join finance.all_cogs f on f.product_id = mp.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date
where  least(min_price,mod_price) between wac_p*0.9 and wac_p*1.3 
)
group by all 
),
region_mapping AS (
    SELECT * 
	FROM 
	(	VALUES
        ('Delta East', 'Delta West'),
        ('Delta West', 'Delta East'),
        ('Alexandria', 'Cairo'),
        ('Alexandria', 'Giza'),
        ('Upper Egypt', 'Cairo'),
        ('Upper Egypt', 'Giza'),
		('Cairo','Giza'),
		('Giza','Cairo'),
		('Delta West', 'Cairo'),
		('Delta East', 'Cairo'),
		('Delta West', 'Giza'),
		('Delta East', 'Giza')
		)
    AS region_mapping(region, fallback_region)
)


select region,warehouse_id,product_id,
min(final_min_price) as final_min_price,
min(final_max_price) as final_max_price,
min(final_mod_price) as final_mod_price,
min(final_true_min) as final_true_min,
min(final_true_max) as final_true_max

from (
SELECT
distinct 
	w.region,
    w.warehouse_id,
	w.product_id,
    COALESCE(m1.min_price, m2.min_price) AS final_min_price,
    COALESCE(m1.max_price, m2.max_price) AS final_max_price,
    COALESCE(m1.mod_price, m2.mod_price) AS final_mod_price,
	COALESCE(m1.true_min, m2.true_min) AS final_true_min,
	COALESCE(m1.true_max, m2.true_max) AS final_true_max,
FROM full_data w
LEFT JOIN MP m1
    ON w.region = m1.region and w.product_id = m1.product_id
JOIN region_mapping rm
    ON w.region = rm.region
LEFT JOIN MP m2
    ON rm.fallback_region = m2.region
   AND w.product_id = m2.product_id
)
where final_min_price is not null 
group by all 
'''
marketplace = snowflake_query("Egypt",query)
marketplace.columns = marketplace.columns.str.lower()
for col in marketplace.columns:
    marketplace[col] = pd.to_numeric(marketplace[col], errors='ignore')

### 4.2 Ben Soliman (Competitor) Prices


In [11]:
query = f'''
select z.* 
from (
select maxab_product_id as product_id,avg(bs_final_price) as ben_soliman_price
from (
select * , row_number()over(partition by maxab_product_id order by diff) as rnk_2
from (
select *,(bs_final_price-wac_p)/wac_p as diff_2
from (
select * ,bs_price/maxab_basic_unit_count as bs_final_price
from (
select *,row_number()over(partition by maxab_product_id,maxab_pu order by diff) as rnk 
from (
select sm.* ,max(INJECTION_DATE::date)over(partition by maxab_product_id,maxab_pu) as max_date,wac1,wac_p,abs(bs_price-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff 
from materialized_views.savvy_mapping sm 
join finance.all_cogs f on f.product_id = sm.maxab_product_id and current_timestamp between f.from_Date and f.to_date
where bs_price is not null 
and INJECTION_DATE::date >= CURRENT_DATE- 5
qualify INJECTION_DATE::date = max_date
)
qualify rnk = 1 
)
)
where diff_2 between -0.5 and 0.5 
)
qualify rnk_2 = 1 
)
group by all
)z 
join finance.all_cogs f on f.product_id = z.product_id and current_timestamp between f.from_Date and f.to_date

where ben_soliman_price between f.wac_p*0.9 and f.wac_p*1.3
'''

bensoliman =  snowflake_query("Egypt",query)
bensoliman.columns = bensoliman.columns.str.lower()
for col in bensoliman.columns:
    bensoliman[col] = pd.to_numeric(bensoliman[col], errors='ignore')         

### 4.3 Scraped Competitor Prices


In [12]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id))
select product_id,x.region,warehouse_id,min(MARKET_PRICE) as min_scrapped,max(MARKET_PRICE) as max_scrapped,median(MARKET_PRICE) as median_scrapped
from (
select MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.*,max(date)over(partition by region,MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id,competitor) as max_date
from MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES
join finance.all_cogs f on f.product_id = MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date 
where date>= current_date -5
and MARKET_PRICE between f.wac_p * 0.9 and wac_p*1.3
qualify date = max_date 
) x 
left join whs on whs.region = x.region
group by all 
'''
scrapped_prices = snowflake_query("Egypt",query)
scrapped_prices.columns = scrapped_prices.columns.str.lower()
for col in scrapped_prices.columns:
    scrapped_prices[col] = pd.to_numeric(scrapped_prices[col], errors='ignore')   

### 4.4 Product Statistics (Margin Boundaries)


In [13]:
query = f'''
select region,product_id,optimal_bm,MIN_BOUNDARY,MAX_BOUNDARY,MEDIAN_BM
from (
select region,product_id,target_bm,optimal_bm,MIN_BOUNDARY,MAX_BOUNDARY,MEDIAN_BM,max(created_at) over(partition by product_id,region) as max_date,created_at
from materialized_views.PRODUCT_STATISTICS
where created_at::date >= date_trunc('month',current_date - 60)
qualify max_date = created_at
)

'''
 
stats = snowflake_query("Egypt",query)
stats.columns = stats.columns.str.lower()
for col in stats.columns:
    stats[col] = pd.to_numeric(stats[col], errors='ignore')    

### 4.5 Warehouse-Region Mapping


In [14]:
query = f'''
select warehouse_id,region
from (
select * ,row_number()over(partition by warehouse_id order by nmv desc) as rnk 
from (
SELECT case when regions.id = 2 then cities.name_en else regions.name_en end as region,
	   pso.warehouse_id,
        sum(pso.total_price) as nmv



FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id             

WHERE   True
    AND so.created_at ::date between current_date-31 and CURRENT_DATE-1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
)
qualify rnk = 1 
)
'''
warehouse_region = snowflake_query("Egypt",query)
warehouse_region.columns = warehouse_region.columns.str.lower()
for col in warehouse_region.columns:
    warehouse_region[col] = pd.to_numeric(warehouse_region[col], errors='ignore')    

### 4.6 Target Margins (Brand/Category)


In [15]:
query = f'''
SELECT DISTINCT cat, brand, margin as target_bm
FROM    performance.commercial_targets cplan
QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE)OVER()) = DATE_TRUNC('month', CURRENT_DATE) THEN DATE_TRUNC('month', CURRENT_DATE)
ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') END = DATE_TRUNC('month', date)
'''
brand_cat_target  = snowflake_query("Egypt",query)
brand_cat_target.target_bm=pd.to_numeric(brand_cat_target.target_bm)

query = f'''
select cat,sum(target_bm *(target_nmv/cat_total)) as cat_target_margin
from (
select *,sum(target_nmv)over(partition by cat) as cat_total
from (
select cat,brand,avg(target_bm) as target_bm , sum(target_nmv) as target_nmv
from (
SELECT DISTINCT date,city as region,cat, brand, margin as target_bm,nmv as target_nmv
FROM    performance.commercial_targets cplan
QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE)OVER()) = DATE_TRUNC('month', CURRENT_DATE) THEN DATE_TRUNC('month', CURRENT_DATE)
ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') END = DATE_TRUNC('month', date)
)
group by all
)
)
group by all 
'''
cat_target  =snowflake_query("Egypt",query)
cat_target.cat_target_margin=pd.to_numeric(cat_target.cat_target_margin)

### 4.7 Merge All Data Sources


In [16]:
final_data = selected_products.merge(
tiers_selection[['warehouse_id','product_id','packing_unit_id','tier_1_qty','tier_2_qty','median_qty','tier_1_increase_pct','tier_2_increase_pct']],
on= ['warehouse_id','product_id','packing_unit_id']
)
final_data=final_data[['warehouse_id','product_id','packing_unit_id','sku','brand', 'cat','packing_unit_price','basic_unit_count','tier_1_qty','tier_2_qty','median_qty','tier_1_increase_pct','tier_2_increase_pct','final_rank']]
final_data=final_data.merge(sku_info[['product_id','wac_p']],on='product_id')
final_data['wac_p'] = (final_data['wac_p']*final_data['basic_unit_count']).round(2)

final_data = final_data.merge(marketplace,on=['product_id','warehouse_id'],how='left')
final_data = final_data.drop(columns = 'region')
final_data = final_data.merge(bensoliman[['product_id','ben_soliman_price']],on=['product_id'],how='left')
final_data = final_data.merge(scrapped_prices,on=['product_id','warehouse_id'],how='left')
final_data = final_data.drop(columns = 'region')

final_data = final_data.merge(warehouse_region,on=['warehouse_id'])
final_data = final_data.merge(stats,on=['product_id','region'],how='left')
final_data = final_data.merge(brand_cat_target,on=['brand','cat'],how='left')
final_data = final_data.merge(cat_target,on=['cat'],how='left')
final_data['Target_margin'] = final_data['target_bm'].fillna(final_data['cat_target_margin'])

## 5. Price Tier Calculation

Calculate tier 1 and tier 2 prices with constraints:
- **Max discount**: 5% from current price
- **Min discount**: 0.35% from current price  
- **Ratio bounds**: discount-to-quantity ratio between 1.3 and 3.5
- **Price ordering**: WAC < Tier 2 < Tier 1 < Current Price


### 5.1 Price Calculation Functions

The `calculate_tier_prices` function uses multiple strategies:
1. **Market prices strategy** - Use competitive pricing data if available
2. **Margin range strategy** - Calculate from margin boundaries if no market data
3. **Ratio adjustment** - Adjust tier_2 price to meet discount-to-quantity ratio bounds


In [17]:
import pandas as pd
import numpy as np

def calculate_tier_prices(row, max_discount_pct=5.0, min_discount_pct=0.35, min_ratio=1.3, max_ratio=3.5):
    """
    Calculate tier 1 and tier 2 prices for a single row.
    
    Parameters:
    - max_discount_pct: Maximum allowed discount from current price (default: 5%)
    - min_discount_pct: Minimum required discount from current price (default: 0.35%)
    - min_ratio: Minimum discount-to-quantity ratio (default: 1.3)
    - max_ratio: Maximum discount-to-quantity ratio (default: 3.5)
    
    Constraints:
    - Tier prices must not go below price calculated with 0.3 * target_margin
    - Ensure: WAC < Tier 2 < Tier 1 < Current Price
    - Ensure: BOTH tiers must be valid or BOTH are None
    - Ensure: discount_qty_ratio = (tier_2_discount/tier_1_discount) / (tier_2_qty/tier_1_qty) is between min_ratio and max_ratio
    """
    
    current_price = row['packing_unit_price']
    wac = row['wac_p']
    
    # Get basic_unit_count for converting market prices
    basic_unit_count = row.get('basic_unit_count', 1)
    if pd.isna(basic_unit_count) or basic_unit_count <= 0:
        basic_unit_count = 1
    
    # Validation
    if pd.isna(current_price) or current_price <= 0:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'invalid_current_price'})
    
    if pd.isna(wac) or wac <= 0:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'invalid_wac'})
    
    if current_price <= wac:
        return pd.Series({'tier_1_price': np.nan, 'tier_2_price': np.nan, 'price_source': 'current_price_below_wac'})
    
    # Calculate discount bounds
    max_discount_price = current_price * (1 - max_discount_pct / 100)  # Minimum allowed price
    min_discount_price = current_price * (1 - min_discount_pct / 100)  # Maximum allowed price
    
    # Calculate absolute minimum price based on target_margin
    # Price must maintain at least 30% of target margin
    absolute_min_price = wac  # Default to WAC if no target_margin
    
    if 'target_margin' in row.index and pd.notna(row['target_margin']) and 0 < row['target_margin'] < 1:
        target_margin = row['target_margin']
        # Minimum margin is 30% of target margin
        min_margin = target_margin * 0.3
        # Calculate minimum price: price = wac / (1 - min_margin)
        absolute_min_price = wac / (1 - min_margin)
    else:
        # Fallback: use wac_cushion_pct
        wac_cushion_pct = 0.25
        absolute_min_price = wac / (1 - (wac_cushion_pct / 100))
    
    # Market price columns (these are per basic unit)
    market_cols = [
        'final_mod_price', 'median_scrapped', 'final_max_price', 
        'ben_soliman_price', 'max_scrapped', 'final_true_max',
        'final_min_price', 'min_scrapped', 'final_true_min'
    ]
    
    # Extract valid market prices (multiply by basic_unit_count, above absolute_min_price, within discount bounds)
    valid_market_prices = []
    for col in market_cols:
        if col in row.index and pd.notna(row[col]) and row[col] > 0:
            # Convert basic unit price to packing unit price
            packing_price = row[col] * basic_unit_count
            
            # Must be: above absolute_min_price AND within discount bounds
            if absolute_min_price < packing_price and max_discount_price <= packing_price <= min_discount_price:
                valid_market_prices.append(packing_price)
    
    # Remove duplicates and sort descending
    valid_market_prices = sorted(list(set(valid_market_prices)), reverse=True)
    
    tier_1 = None
    tier_2 = None
    source = ''
    
    min_gap_pct = 0.25
    
    # Strategy 1: Use market prices
    if len(valid_market_prices) >= 3:
        # Select from available prices
        tier_1 = valid_market_prices[0]  # Highest price
        
        # Find tier 2 with minimum gap
        for price in valid_market_prices[1:]:
            if price < tier_1 * (1 - min_gap_pct / 100):
                tier_2 = price
                break
        
        # If no suitable tier 2 found, take second highest
        if tier_2 is None and len(valid_market_prices) > 1:
            tier_2 = valid_market_prices[1]
        
        if tier_1 is not None and tier_2 is not None:
            source = 'market_prices'
    
    elif len(valid_market_prices) == 2:
        tier_1 = valid_market_prices[0]
        tier_2 = valid_market_prices[1]
        source = 'market_prices'
    
    elif len(valid_market_prices) == 1:
        # Only one market price - use margin range for the other
        market_price = valid_market_prices[0]
        
        # Calculate which tier this should be based on its position
        price_position = (market_price - max_discount_price) / (min_discount_price - max_discount_price)
        
        # If in upper half (>0.5), use as tier 1 and calculate tier 2
        # If in lower half (<=0.5), use as tier 2 and calculate tier 1
        if price_position > 0.5:
            tier_1 = market_price
            tier_2 = calculate_from_margin_range(row, wac, current_price, tier_1, tier=2, 
                                                 max_discount_price=max_discount_price,
                                                 min_discount_price=min_discount_price,
                                                 absolute_min_price=absolute_min_price)
            if tier_2 is not None:
                source = 'market_tier1_margin_tier2'
        else:
            tier_2 = market_price
            tier_1 = calculate_from_margin_range(row, wac, current_price, tier_2, tier=1,
                                                 max_discount_price=max_discount_price,
                                                 min_discount_price=min_discount_price,
                                                 absolute_min_price=absolute_min_price)
            if tier_1 is not None:
                source = 'margin_tier1_market_tier2'
    
    # Strategy 2: No market prices - use margin range method
    if tier_1 is None or tier_2 is None:
        tier_1, tier_2 = calculate_both_from_margin_range(row, wac, current_price,
                                                          max_discount_price=max_discount_price,
                                                          min_discount_price=min_discount_price,
                                                          absolute_min_price=absolute_min_price)
        if tier_1 is not None and tier_2 is not None:
            source = 'margin_range_based'
    
    # CRITICAL: Final validation - BOTH must be valid or BOTH are None
    if tier_1 is not None and tier_2 is not None:
        # Ensure correct ordering
        if tier_2 >= tier_1:
            tier_1, tier_2 = max(tier_1, tier_2), min(tier_1, tier_2)
        
        # Apply discount bounds
        tier_1 = max(tier_1, max_discount_price)
        tier_1 = min(tier_1, min_discount_price)
        tier_2 = max(tier_2, max_discount_price)
        tier_2 = min(tier_2, min_discount_price)
        
        # Check if both above absolute minimum price
        if tier_1 <= absolute_min_price or tier_2 <= absolute_min_price:
            tier_1 = None
            tier_2 = None
            source = 'prices_below_minimum_margin'
        else:
            # Ensure minimum gap between tiers
            if tier_2 > tier_1 * (1 - min_gap_pct / 100):
                tier_2 = tier_1 * (1 - min_gap_pct / 100)
                if tier_2 <= absolute_min_price:
                    tier_1 = None
                    tier_2 = None
                    source = 'insufficient_gap_between_tiers'
            
            # Final check: both still valid?
            if tier_1 is not None and tier_2 is not None:
                if not (wac < tier_2 < tier_1 < current_price):
                    tier_1 = None
                    tier_2 = None
                    source = 'invalid_tier_ordering'
                elif not (max_discount_price <= tier_2 and tier_1 <= min_discount_price):
                    tier_1 = None
                    tier_2 = None
                    source = 'tiers_outside_discount_bounds'
                else:
                    tier_1 = round(tier_1, 2)
                    tier_2 = round(tier_2, 2)
                    
                    # Validate and adjust discount-to-quantity ratio
                    tier_1_qty = row.get('tier_1_qty', None)
                    tier_2_qty = row.get('tier_2_qty', None)
                    
                    if tier_1_qty is not None and tier_2_qty is not None and tier_1_qty > 0:
                        tier_1_discount = current_price - tier_1
                        tier_2_discount = current_price - tier_2
                        
                        if tier_1_discount > 0:
                            diff_quantity = tier_2_qty / tier_1_qty
                            diff_discount = tier_2_discount / tier_1_discount
                            
                            if diff_quantity > 0:
                                discount_qty_ratio = diff_discount / diff_quantity
                                
                                # Adjust tier_2_price if ratio is outside bounds
                                if discount_qty_ratio < min_ratio:
                                    # Ratio too low - need more discount at tier 2
                                    # tier_2 = current_price - (target_ratio * diff_quantity * tier_1_discount)
                                    target_tier_2_discount = min_ratio * diff_quantity * tier_1_discount
                                    adjusted_tier_2 = current_price - target_tier_2_discount
                                    
                                    # Ensure adjusted price is still valid (above WAC and absolute_min_price)
                                    if adjusted_tier_2 > wac and adjusted_tier_2 > absolute_min_price and adjusted_tier_2 < tier_1:
                                        tier_2 = round(adjusted_tier_2, 2)
                                        source = source + '_ratio_adjusted_up'
                                    else:
                                        tier_1 = None
                                        tier_2 = None
                                        source = f'cannot_adjust_ratio_{discount_qty_ratio:.2f}_min_bound'
                                
                                elif discount_qty_ratio > max_ratio:
                                    # Ratio too high - need less discount at tier 2
                                    # tier_2 = current_price - (target_ratio * diff_quantity * tier_1_discount)
                                    target_tier_2_discount = max_ratio * diff_quantity * tier_1_discount
                                    adjusted_tier_2 = current_price - target_tier_2_discount
                                    
                                    # Ensure adjusted price is still valid (below tier_1 and above WAC)
                                    if adjusted_tier_2 > wac and adjusted_tier_2 > absolute_min_price and adjusted_tier_2 < tier_1:
                                        tier_2 = round(adjusted_tier_2, 2)
                                        source = source + '_ratio_adjusted_down'
                                    else:
                                        tier_1 = None
                                        tier_2 = None
                                        source = f'cannot_adjust_ratio_{discount_qty_ratio:.2f}_max_bound'
    
    # FINAL CHECK: If only one tier exists, invalidate both
    if (tier_1 is None and tier_2 is not None) or (tier_1 is not None and tier_2 is None):
        tier_1 = None
        tier_2 = None
        source = 'incomplete_tier_pair'
    
    # If both are None and no source set, mark it
    if tier_1 is None and tier_2 is None and source == '':
        source = 'no_valid_prices'
    
    return pd.Series({
        'tier_1_price': tier_1,
        'tier_2_price': tier_2,
        'price_source': source
    })


def calculate_both_from_margin_range(row, wac, current_price, max_discount_price, min_discount_price, absolute_min_price):
    """
    Calculate both tier prices using margin range from minimum of (min_boundary, optimal_bm) to current margin.
    Returns (tier_1_price, tier_2_price) or (None, None)
    """
    
    # Calculate current margin: margin = (price - wac) / price
    current_margin = (current_price - wac) / current_price
    
    # Get min_boundary margin
    min_boundary_margin = None
    if 'min_boundary' in row.index and pd.notna(row['min_boundary']) and 0 < row['min_boundary'] < 1:
        min_boundary_margin = row['min_boundary']
    
    # Get optimal_bm margin
    optimal_margin = None
    if 'optimal_bm' in row.index and pd.notna(row['optimal_bm']) and 0 < row['optimal_bm'] < 1:
        optimal_margin = row['optimal_bm']
    
    # Determine starting margin: minimum of (min_boundary, optimal_bm)
    start_margin = None
    
    if min_boundary_margin is not None and optimal_margin is not None:
        start_margin = min(min_boundary_margin, optimal_margin)
    elif min_boundary_margin is not None:
        start_margin = min_boundary_margin
    elif optimal_margin is not None:
        start_margin = optimal_margin
    else:
        # Fallback: use 50% of current margin
        start_margin = current_margin * 0.85
    
    # Ensure start_margin is less than current margin
    if start_margin >= current_margin:
        start_margin = current_margin * 0.85
    
    # Generate margin points in the range (10 points)
    num_points = 10
    margin_range = np.linspace(start_margin, current_margin, num_points)
    
    # Calculate prices from these margins: price = wac / (1 - margin)
    price_candidates = []
    for margin in margin_range:
        if 0 < margin < 1:
            price = wac / (1 - margin)
            # Only keep prices within discount bounds and above absolute_min_price
            if absolute_min_price < price and max_discount_price <= price <= min_discount_price:
                price_candidates.append(price)
    
    if len(price_candidates) < 2:
        return None, None
    
    # Sort prices descending
    price_candidates = sorted(price_candidates, reverse=True)
    
    # Select Tier 1: closer to the top (less discount)
    # Select Tier 2: further down (more discount)
    tier_1_idx = int(len(price_candidates) * 0.25)  # 25% from top
    tier_2_idx = int(len(price_candidates) * 0.65)  # 65% from top
    
    # Ensure valid indices
    tier_1_idx = max(0, min(tier_1_idx, len(price_candidates) - 2))
    tier_2_idx = max(tier_1_idx + 1, min(tier_2_idx, len(price_candidates) - 1))
    
    tier_1 = price_candidates[tier_1_idx]
    tier_2 = price_candidates[tier_2_idx]
    
    # Ensure meaningful gap (at least 0.5%)
    min_gap_pct = 0.25
    if tier_2 > tier_1 * (1 - min_gap_pct / 100):
        # Try to find better tier_2
        for i in range(tier_2_idx + 1, len(price_candidates)):
            if price_candidates[i] < tier_1 * (1 - min_gap_pct / 100):
                tier_2 = price_candidates[i]
                break
    
    # Final validation
    if tier_2 >= tier_1 or tier_1 <= absolute_min_price or tier_2 <= absolute_min_price:
        return None, None
    
    return tier_1, tier_2


def calculate_from_margin_range(row, wac, current_price, other_tier_price, tier, 
                                max_discount_price, min_discount_price, absolute_min_price):
    """
    Calculate single tier price using margin range.
    Used when one tier is from market and we need to calculate the other.
    """
    
    # Calculate current margin
    current_margin = (current_price - wac) / current_price
    
    # Get min_boundary margin
    min_boundary_margin = None
    if 'min_boundary' in row.index and pd.notna(row['min_boundary']) and 0 < row['min_boundary'] < 1:
        min_boundary_margin = row['min_boundary']
    
    # Get optimal_bm margin
    optimal_margin = None
    if 'optimal_bm' in row.index and pd.notna(row['optimal_bm']) and 0 < row['optimal_bm'] < 1:
        optimal_margin = row['optimal_bm']
    
    # Determine starting margin: minimum of (min_boundary, optimal_bm)
    start_margin = None
    
    if min_boundary_margin is not None and optimal_margin is not None:
        start_margin = min(min_boundary_margin, optimal_margin)
    elif min_boundary_margin is not None:
        start_margin = min_boundary_margin
    elif optimal_margin is not None:
        start_margin = optimal_margin
    else:
        start_margin = current_margin * 0.5
    
    # Ensure start_margin is less than current margin
    if start_margin >= current_margin:
        start_margin = current_margin * 0.7
    
    # Generate margin range (10 points)
    num_points = 10
    margin_range = np.linspace(start_margin, current_margin, num_points)
    
    # Calculate prices
    price_candidates = []
    for margin in margin_range:
        if 0 < margin < 1:
            price = wac / (1 - margin)
            if absolute_min_price < price and max_discount_price <= price <= min_discount_price:
                price_candidates.append(price)
    
    if len(price_candidates) == 0:
        return None
    
    # Sort prices descending
    price_candidates = sorted(price_candidates, reverse=True)
    
    min_gap_pct = 0.5
    
    if tier == 1:
        # Need tier 1 (higher price), we have tier 2 (lower price)
        # Find prices above tier 2 with proper gap
        target_candidates = [p for p in price_candidates 
                           if p > other_tier_price * (1 + min_gap_pct / 100)]
        if target_candidates:
            # Take from upper portion (25% position)
            idx = int(len(target_candidates) * 0.25)
            return target_candidates[idx]
        return None
    
    else:
        # Need tier 2 (lower price), we have tier 1 (higher price)
        # Find prices below tier 1 with proper gap
        target_candidates = [p for p in price_candidates 
                           if p < other_tier_price * (1 - min_gap_pct / 100)]
        if target_candidates:
            # Take from lower portion (65% position)
            idx = int(len(target_candidates) * 0.65)
            idx = min(idx, len(target_candidates) - 1)
            return target_candidates[idx]
        return None


### 5.2 Apply Price Calculations


In [18]:
df = final_data.copy()
final_data.columns = final_data.columns.str.lower()

print(f"Processing {len(final_data)} SKUs...")

# QUESTIONS FOR USER - Set these parameters:
MAX_DISCOUNT_PCT = 5.0      # Maximum discount allowed
MIN_DISCOUNT_PCT = 0.35     # Minimum discount required
MIN_RATIO = 1.3             # Minimum discount-to-quantity ratio
MAX_RATIO = 3.5             # Maximum discount-to-quantity ratio

# Apply function to each row with discount bounds and ratio constraints
result = final_data.apply(lambda row: calculate_tier_prices(row, 
                                                    max_discount_pct=MAX_DISCOUNT_PCT,
                                                    min_discount_pct=MIN_DISCOUNT_PCT,
                                                    min_ratio=MIN_RATIO,
                                                    max_ratio=MAX_RATIO), 
                 axis=1)

# Merge results back to dataframe
final_data = pd.concat([final_data, result], axis=1)

# Show how many were adjusted vs couldn't be adjusted
ratio_adjusted_up = final_data['price_source'].str.contains('ratio_adjusted_up', na=False).sum()
ratio_adjusted_down = final_data['price_source'].str.contains('ratio_adjusted_down', na=False).sum()
cannot_adjust = final_data['price_source'].str.contains('cannot_adjust_ratio', na=False).sum()
print(f"Ratio adjusted up (was below {MIN_RATIO}): {ratio_adjusted_up} SKUs")
print(f"Ratio adjusted down (was above {MAX_RATIO}): {ratio_adjusted_down} SKUs")
print(f"Could not adjust (constraints violated): {cannot_adjust} SKUs")

final_data = final_data[(~final_data['tier_1_price'].isna())&(~final_data['tier_2_price'].isna())]
print(f"Final SKUs with valid tier prices: {len(final_data)}")

Processing 2400 SKUs...
Ratio adjusted up (was below 1.3): 1577 SKUs
Ratio adjusted down (was above 3.5): 29 SKUs
Could not adjust (constraints violated): 83 SKUs
Final SKUs with valid tier prices: 1974


## 6. Wholesale Pricing

Calculate wholesale prices based on:
- Vehicle capacity (quarter truck)
- Rank-based margin tiers (20%, 25%, 40%, 60% of target margin)
- Must be below tier_2_price


In [19]:
final_data['delivery_fees'] =  DELIVERY_FEE_OTHER
final_data.loc[final_data['region'].isin(['Cairo','Giza']),'delivery_fees'] = DELIVERY_FEE_CAIRO_GIZA
query_data = final_data[['warehouse_id', 'product_id', 'packing_unit_id','delivery_fees']].values.tolist()
query_info = ','.join([f"({int(wh_id)}, {int(prod_id)}, {int(pu_id)}, {int(delivery_fees)})" for wh_id, prod_id, pu_id,delivery_fees in query_data])

In [20]:
query = f'''
with chosen_products as (
select *
from (
values 
{query_info}
)x(warehouse_id,product_id,packing_unit_id,delivery_fees)

),
vec as (
select  vt.id as vehicle_id,name_en as vehicle_name,vc.weight as vehicle_weight,vc.cbm as vehicle_cbm,900 as vehicle_cost
from VEHICLE_TYPES  vt 
join  RETOOL.VEHICLE_CAPACITIES vc on vc.vehicle_id = vt.id
where vehicle_id = 1
),
selected_products as (
select x.*,	(long*width*height)/1000000 AS cbm,weight/1000 AS weight,
from chosen_products x
join packing_unit_products on x.product_id = packing_unit_products.product_id and packing_unit_products.packing_unit_id = x.packing_unit_id
),
main_cte as (
select warehouse_id,product_id,packing_unit_id,delivery_fees,
ceil(least(quart_dababa_wht,quart_dababa_cbm)) as quart_dababa,
vehicle_cost
from (
select * ,
((vehicle_weight*0.9)/4)/weight as quart_dababa_wht , 
((vehicle_cbm*0.9)/4)/cbm as quart_dababa_cbm  
from (
select selected_products.*, vehicle_weight,vehicle_cbm,vehicle_cost
from selected_products,vec
)
)
)
select mc.*, f.wac_p , 
(f.wac_p*quart_dababa)+(((vehicle_cost-(delivery_fees*4))*0.9)/4) as quart_cost,
quart_cost/quart_dababa as unit_cost


from main_cte mc 
join finance.all_cogs f on f.product_id = mc.product_id and CURRENT_TIMEstamp between from_date and to_date 

'''
ws_data  =snowflake_query("Egypt",query)
ws_data.columns = ws_data.columns.str.lower()
for col in ws_data.columns:
    ws_data[col] = pd.to_numeric(ws_data[col], errors='ignore') 
ws_data=ws_data[['warehouse_id', 'product_id', 'packing_unit_id','quart_dababa','unit_cost']]
ws_data.columns = ['warehouse_id', 'product_id', 'packing_unit_id','WS_tier','WS_wac']

In [21]:
final_data = final_data.merge(ws_data,on=['warehouse_id', 'product_id', 'packing_unit_id'],how='left')
final_data['WS_wac'] = final_data['WS_wac']*final_data['basic_unit_count']

In [22]:
def wholesales_margin(x):
    wac = x['WS_wac']
    target_margin =x['target_margin']
    min_margin = min(x['optimal_bm'],x['min_boundary'])
    tier_2_price = x['tier_2_price']
    final_rank = x['final_rank']
    tier = 0 
    new_price = 0
    price = wac/(1-0.01)
    if final_rank <= 0.25*133:
        tier = 1 
    elif final_rank > 0.25*133 and final_rank <= 0.5*133:  
        tier = 2
    elif final_rank > 0.5*133 and final_rank <= 0.75*133:  
        tier = 3
    else:
        tier = 4 
        
    if  tier == 1 :
            price= wac/ (1-np.minimum(np.maximum(((0.2)*x['target_margin']),0.01),x['target_margin']))
    elif tier == 2 :  
        price= wac / (1-np.minimum(np.maximum(((0.25)*x['target_margin']),0.015),x['target_margin']))
    elif tier == 3 :  
        price= wac / (1-np.minimum(np.maximum(((0.4)*x['target_margin']),0.015),x['target_margin']))    
    else:
        price = wac / (1-np.minimum(np.maximum(((0.6)*x['target_margin']),0.015),x['target_margin']))
    if price >= tier_2_price:
        new_price = (wac+tier_2_price)/2
    return np.maximum(new_price,wac/(1-0.01))

In [23]:
final_data['WS_price'] = final_data.apply(wholesales_margin,axis=1)
final_data['valid'] = final_data['WS_price']<final_data['tier_2_price']
final_data.loc[final_data['valid']==False,'WS_price']=np.nan

In [24]:
final_data['new_rank'] = final_data.groupby(['warehouse_id'])['final_rank'].rank(method='dense', ascending=True)
final_data=final_data[final_data['new_rank']<=133]

In [25]:
# =============================================================================
# CALCULATE ADDITIONAL METRICS
# =============================================================================

# --- Stretch Percentages (how much retailers need to increase to reach each tier) ---
# Already included from tiers_selection: tier_1_increase_pct, tier_2_increase_pct
# These show: (tier_qty - median_qty) / median_qty * 100

# Rename for clarity
final_data['stretch_to_tier_1_pct'] = final_data['tier_1_increase_pct']
final_data['stretch_to_tier_2_pct'] = final_data['tier_2_increase_pct']

# --- Margins for each price tier ---
# Margin = (price - wac) / price
final_data['tier_1_margin'] = ((final_data['tier_1_price'] - final_data['wac_p']) / final_data['tier_1_price']).round(4)
final_data['tier_2_margin'] = ((final_data['tier_2_price'] - final_data['wac_p']) / final_data['tier_2_price']).round(4)
final_data['WS_margin'] = ((final_data['WS_price'] - final_data['wac_p']) / final_data['wac_p']).round(4)
final_data['current_margin'] = ((final_data['packing_unit_price'] - final_data['wac_p']) / final_data['packing_unit_price']).round(4)

# --- Discount calculations ---
# Absolute discounts (price reduction from current price)
final_data['discount_1'] = (final_data['packing_unit_price'] - final_data['tier_1_price']).round(2)
final_data['discount_2'] = (final_data['packing_unit_price'] - final_data['tier_2_price']).round(2)

# Discount percentages
final_data['discount_1_pct'] = ((final_data['discount_1'] / final_data['packing_unit_price']) * 100).round(2)
final_data['discount_2_pct'] = ((final_data['discount_2'] / final_data['packing_unit_price']) * 100).round(2)

# --- Quantity and Discount Ratios ---
# Quantity ratio (tier_2_qty / tier_1_qty)
final_data['qty_ratio'] = (final_data['tier_2_qty'] / final_data['tier_1_qty']).round(2)

# Discount ratio (discount_2 / discount_1)
final_data['discount_ratio'] = (final_data['discount_2'] / final_data['discount_1']).round(2)

# Elasticity ratio = discount_ratio / qty_ratio
# This shows how much extra discount per unit of quantity increase
final_data['elasticity_ratio'] = (final_data['discount_ratio'] / final_data['qty_ratio']).round(2)

print("=== METRICS SUMMARY ===")
print(f"\nStretch Analysis (how much retailers need to increase orders):")
print(f"  Average stretch to Tier 1: {final_data['stretch_to_tier_1_pct'].mean():.1f}%")
print(f"  Average stretch to Tier 2: {final_data['stretch_to_tier_2_pct'].mean():.1f}%")

print(f"\nMargin Analysis:")
print(f"  Current margin:  {final_data['current_margin'].mean()*100:.2f}%")
print(f"  Tier 1 margin:   {final_data['tier_1_margin'].mean()*100:.2f}%")
print(f"  Tier 2 margin:   {final_data['tier_2_margin'].mean()*100:.2f}%")
print(f"  WS margin:       {final_data['WS_margin'].mean()*100:.2f}%")

print(f"\nDiscount Analysis:")
print(f"  Average Tier 1 discount: {final_data['discount_1_pct'].mean():.2f}%")
print(f"  Average Tier 2 discount: {final_data['discount_2_pct'].mean():.2f}%")

print(f"\nElasticity Analysis (discount increase vs quantity increase):")
print(f"  Average qty ratio (T2/T1): {final_data['qty_ratio'].mean():.2f}x")
print(f"  Average discount ratio (D2/D1): {final_data['discount_ratio'].mean():.2f}x")
print(f"  Average elasticity ratio: {final_data['elasticity_ratio'].mean():.2f}")


=== METRICS SUMMARY ===

Stretch Analysis (how much retailers need to increase orders):
  Average stretch to Tier 1: 255.5%
  Average stretch to Tier 2: 519.1%

Margin Analysis:
  Current margin:  5.74%
  Tier 1 margin:   5.03%
  Tier 2 margin:   3.98%
  WS margin:       2.31%

Discount Analysis:
  Average Tier 1 discount: 0.75%
  Average Tier 2 discount: 1.83%

Elasticity Analysis (discount increase vs quantity increase):
  Average qty ratio (T2/T1): 1.74x
  Average discount ratio (D2/D1): 2.48x
  Average elasticity ratio: 1.43


## 7. Final Ranking & Export


In [26]:
# Save to Excel with all metrics
output_file = 'QD_upload.xlsx'
final_data.to_excel(output_file, index=False)

print(f"\n=== EXPORT COMPLETE ===")
print(f"Saved {len(final_data)} SKUs to '{output_file}'")
print(f"\nNew columns added:")
print("  - stretch_to_tier_1_pct: % increase from median qty to tier 1")
print("  - stretch_to_tier_2_pct: % increase from median qty to tier 2")
print("  - tier_1_margin, tier_2_margin, WS_margin: margins for each price")
print("  - current_margin: margin at current price")
print("  - discount_1, discount_2: absolute discount amounts")
print("  - discount_1_pct, discount_2_pct: discount percentages")
print("  - qty_ratio: tier_2_qty / tier_1_qty")
print("  - discount_ratio: discount_2 / discount_1")
print("  - elasticity_ratio: discount_ratio / qty_ratio")


=== EXPORT COMPLETE ===
Saved 1596 SKUs to 'QD_upload.xlsx'

New columns added:
  - stretch_to_tier_1_pct: % increase from median qty to tier 1
  - stretch_to_tier_2_pct: % increase from median qty to tier 2
  - tier_1_margin, tier_2_margin, WS_margin: margins for each price
  - current_margin: margin at current price
  - discount_1, discount_2: absolute discount amounts
  - discount_1_pct, discount_2_pct: discount percentages
  - qty_ratio: tier_2_qty / tier_1_qty
  - discount_ratio: discount_2 / discount_1
  - elasticity_ratio: discount_ratio / qty_ratio
