In [1]:
%%capture
# =============================================================================
# PACKAGE INSTALLATION
# =============================================================================
!pip install --upgrade pip

# Database Connectivity
!pip install psycopg2-binary snowflake-connector-python==3.15.0 snowflake-sqlalchemy

# Authentication & Cloud
!pip install keyring==23.11.0 sqlalchemy==1.4.46 requests boto3
!pip install oauth2client gspread==5.9.0 gspread_dataframe google.cloud

# Data Processing
!pip install pandas==2.2.1 numpy polars openpyxl xlsxwriter

# Utilities
!pip install tqdm db-dtypes pytz import-ipynb

In [2]:
# =============================================================================
# IMPORTS & ENVIRONMENT SETUP
# =============================================================================
import os
import json
import warnings
import importlib
from datetime import datetime, date, timedelta

import pandas as pd
import numpy as np
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import snowflake.connector

import setup_environment_2
import import_ipynb

# Suppress warnings
warnings.filterwarnings("ignore")

# Initialize environment
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

  warn_incompatible_dep(


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


In [4]:
# =============================================================================
# CONFIGURATION
# =============================================================================

# Pricing status mode: "min_market" adjusts tiers down by 1
STATUS = ""# "min_market"

# Google Sheets API scope
GSHEETS_SCOPE = [
    "https://spreadsheets.google.com/feeds",
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive.file",
    "https://www.googleapis.com/auth/drive"
]

# Brand/Category pricing rules
BELOW_MARKET_BRANDS = ['شويبس', 'كوكا كولا']
MIN_PRICE_BRANDS = ['فاميليا', 'مولبد', 'مولفيكس', 'اوكسي', 'جوي', 'ريفولي', 'البوادي', 'هارفست فوودز', 'هاينز', 'بيبسي']
AVG_PRICE_BRANDS = ['بخيره', 'جود كير']
MAX_PRICE_BRANDS = ['فيوري']
MIN_PRICE_CATEGORIES = ['تونة و سمك']

# Warehouse mapping
WAREHOUSE_CONFIG = pd.DataFrame([
    ('Cairo', 'El-Marg', 38, 700),
    ('Cairo', 'Mostorod', 1, 700),
    ('Giza', 'Barageel', 236, 701),
    ('Delta West', 'El-Mahala', 337, 703),
    ('Delta West', 'Tanta', 8, 703),
    ('Delta East', 'Mansoura FC', 339, 704),
    ('Delta East', 'Sharqya', 170, 704),
    ('Upper Egypt', 'Assiut FC', 501, 1124),
    ('Upper Egypt', 'Bani sweif', 401, 1126),
    ('Upper Egypt', 'Menya Samalot', 703, 1123),
    ('Upper Egypt', 'Sohag', 632, 1125),
    ('Alexandria', 'Khorshed Alex', 797, 702),
    ('Giza', 'Sakkarah', 962, 701)
], columns=['region', 'warehouse', 'warehouse_id', 'cohort_id'])

# Region to cohort mapping
REGION_COHORT_MAP = pd.DataFrame({
    'region': ['Cairo', 'Giza', 'Delta West', 'Delta East', 'Upper Egypt', 
               'Upper Egypt', 'Upper Egypt', 'Upper Egypt', 'Alexandria'],
    'cohort_id': [700, 701, 703, 704, 1124, 1126, 1123, 1125, 702]
})

# Products to exclude from TGTG processing
TGTG_EXCLUSIONS = pd.DataFrame([
], columns=['product_id', 'warehouse_id'])
TGTG_EXCLUSIONS['remove'] = 1

In [5]:
# =============================================================================
# HELPER FUNCTIONS
# =============================================================================

def query_snowflake(query, columns=None):
    """Execute a query on Snowflake and return results as DataFrame."""
    con = snowflake.connector.connect(
        user=os.environ["SNOWFLAKE_USERNAME"],
        account=os.environ["SNOWFLAKE_ACCOUNT"],
        password=os.environ["SNOWFLAKE_PASSWORD"],
        database=os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        data = cur.fetchall()
        return pd.DataFrame(data, columns=columns) if columns else pd.DataFrame(data)
    except Exception as e:
        print(f"Snowflake Error: {e}")
        return pd.DataFrame()
    finally:
        cur.close()
        con.close()


def get_gsheets_client():
    """Get authenticated Google Sheets client."""
    creds = ServiceAccountCredentials.from_json_keyfile_dict(
        json.loads(setup_environment_2.get_secret("prod/maxab-sheets")), 
        GSHEETS_SCOPE
    )
    return gspread.authorize(creds)


def to_numeric_columns(df):
    """Convert all columns to numeric where possible."""
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='ignore')
    return df


def assign_tier(cumulative_contribution):
    """Assign pricing tier based on cumulative NMV contribution."""
    thresholds = [0.4, 0.6, 0.8, 0.95]
    for i, threshold in enumerate(thresholds, 1):
        if cumulative_contribution <= threshold:
            return i
    return 5


def price_analysis(row):
    """Analyze prices and calculate percentiles for a product."""
    wac = row['wac_p']
    avg_margin = row['avg_margin'] if row['avg_margin'] >= 0.01 else row['target_margin']
    std = row['std']
    
    # Collect all price points
    price_list = [
        row['ben_soliman_price'], row['final_min_price'], row['final_mod_price'],
        row['final_max_price'], row['min_scrapped'], row['median_scrapped'], row['max_scrapped']
    ]
    
    # Filter valid prices within acceptable range
    valid_prices = sorted({
        x for x in price_list 
        if x and not pd.isna(x) and x != 0 
        and wac / (1 - (avg_margin - 2.5 * std)) <= x <= wac / (1 - (avg_margin + 4 * std))
        and x >= wac
    })
    
    if not valid_prices:
        return np.nan, np.nan, np.nan, np.nan, np.nan
    
    return (
        np.min(valid_prices),
        np.percentile(valid_prices, 25),
        np.percentile(valid_prices, 50),
        np.percentile(valid_prices, 75),
        np.max(valid_prices)
    )


def calculate_step_bounds(row):
    """Calculate below/above market bounds based on price steps."""
    wac = row['wac_p']
    std = row['std']
    prices = [row['minimum'], row['percentile_25'], row['percentile_50'], row['percentile_75'], row['maximum']]
    
    # Calculate valid steps between price points
    valid_steps = []
    for i in range(len(prices) - 1):
        step = prices[i + 1] - prices[i]
        if (step / wac) <= std * 1.2:
            valid_steps.append(step)
    
    avg_step = np.mean(valid_steps) if valid_steps else min(2 * std, 0.2 * row['target_margin'])
    
    new_min = prices[0] - avg_step if (prices[0] - avg_step) >= wac else prices[0]
    new_max = prices[-1] + avg_step if (prices[-1] + avg_step) >= wac else prices[-1]
    
    return new_min, new_max


def convert_sku_id(row):
    """Convert SKU string to integer ID."""
    try:
        return int(str(row.SKU).replace(",", ""))
    except:
        return row.SKU

In [6]:
# =============================================================================
# DATA LOADING - Snowflake Timezone & Google Sheets
# =============================================================================

# Get Snowflake timezone
zone_to_use = query_snowflake("SHOW PARAMETERS LIKE 'TIMEZONE'")[1].values[0]
print(f"Snowflake timezone: {zone_to_use}")

# Initialize Google Sheets client
client = get_gsheets_client()

# Load min_max margin cohort data
min_max_sheet = client.open('Demand Based Dynamic Pricing').worksheet('min_max_margin_cohort')
min_max_df = to_numeric_columns(pd.DataFrame(min_max_sheet.get_all_records()))
min_max_df = min_max_df[min_max_df['min_margin'] > 0.01]

# Load Blue FD campaign brands
blue_brands_sheet = client.open('Anniversary Campaign 2025 (Final)').worksheet('Suppliers Brands')
blue_list = pd.DataFrame(blue_brands_sheet.get_all_records())[['Brands']].drop_duplicates()['Brands'].tolist()

print(f"Loaded {len(min_max_df)} min_max records, {len(blue_list)} blue campaign brands")

Snowflake timezone: America/Los_Angeles
Loaded 8075 min_max records, 126 blue campaign brands


In [None]:
# =============================================================================
# DATA LOADING - Market Main Data Query
# =============================================================================

MARKET_DATA_QUERY = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
                            ('Giza', 'Sakkarah', 962,701))
                    x(region, wh, warehouse_id,cohort_id)),

full_data as (
    select products.id as product_id, region
    from products, whs 
    where activation = 'true'
),

MP as (
    select region, product_id,
        min(min_price) as min_price, min(max_price) as max_price,
        min(mod_price) as mod_price, min(true_min) as true_min, min(true_max) as true_max
    from (
        select mp.region, mp.product_id, mp.pu_id,
            min_price/BASIC_UNIT_COUNT as min_price,
            max_price/BASIC_UNIT_COUNT as max_price,
            mod_price/BASIC_UNIT_COUNT as mod_price,
            TRUE_MIN_PRICE/BASIC_UNIT_COUNT as true_min,
            TRUE_MAX_PRICE/BASIC_UNIT_COUNT as true_max
        from materialized_views.marketplace_prices mp 
        join packing_unit_products pup on pup.product_id = mp.product_id and pup.packing_unit_id = mp.pu_id
        join finance.all_cogs f on f.product_id = mp.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date
        where least(min_price, mod_price) between wac_p*0.9 and wac_p*1.3 
    )
    group by all 
),

region_mapping AS (
    SELECT * FROM (VALUES
        ('Delta East', 'Delta West'), ('Delta West', 'Delta East'),
        ('Alexandria', 'Cairo'), ('Alexandria', 'Giza'),
        ('Upper Egypt', 'Cairo'), ('Upper Egypt', 'Giza'),
        ('Cairo', 'Giza'), ('Giza', 'Cairo'),
        ('Delta West', 'Cairo'), ('Delta East', 'Cairo'),
        ('Delta West', 'Giza'), ('Delta East', 'Giza')
    ) AS region_mapping(region, fallback_region)
),

final_mp as (
    select region, product_id,
        min(final_min_price) as final_min_price, min(final_max_price) as final_max_price,
        min(final_mod_price) as final_mod_price, min(final_true_min) as final_true_min,
        min(final_true_max) as final_true_max
    from (
        SELECT distinct w.region, w.product_id,
            COALESCE(m1.min_price, m2.min_price) AS final_min_price,
            COALESCE(m1.max_price, m2.max_price) AS final_max_price,
            COALESCE(m1.mod_price, m2.mod_price) AS final_mod_price,
            COALESCE(m1.true_min, m2.true_min) AS final_true_min,
            COALESCE(m1.true_max, m2.true_max) AS final_true_max
        FROM full_data w
        LEFT JOIN MP m1 ON w.region = m1.region and w.product_id = m1.product_id
        JOIN region_mapping rm ON w.region = rm.region
        LEFT JOIN MP m2 ON rm.fallback_region = m2.region AND w.product_id = m2.product_id
    )
    where final_min_price is not null 
    group by all 
),

ben_soliman as (
    select z.* from (
        select maxab_product_id as product_id, maxab_sku as sku, avg(bs_final_price) as ben_soliman_price
        from (
            select *, row_number() over(partition by maxab_product_id order by diff) as rnk_2 from (
                select *, (bs_final_price-wac_p)/wac_p as diff_2 from (
                    select *, bs_price/maxab_basic_unit_count as bs_final_price from (
                        select *, row_number() over(partition by maxab_product_id, maxab_pu order by diff) as rnk from (
                            select sm.*, max(INJECTION_DATE::date) over(partition by maxab_product_id, maxab_pu) as max_date,
                                wac1, wac_p, abs(bs_price-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff 
                            from materialized_views.savvy_mapping sm 
                            join finance.all_cogs f on f.product_id = sm.maxab_product_id and current_timestamp between f.from_Date and f.to_date
                            where bs_price is not null and INJECTION_DATE::date >= CURRENT_DATE - 5
                            qualify INJECTION_DATE::date = max_date
                        ) qualify rnk = 1 
                    )
                ) where diff_2 between -0.5 and 0.5 
            ) qualify rnk_2 = 1 
        ) group by all
    ) z 
    join finance.all_cogs f on f.product_id = z.product_id and current_timestamp between f.from_Date and f.to_date
    where ben_soliman_price between f.wac_p*0.7 and f.wac_p*1.3
),

scrapped_data as (
    select product_id, cat, brand, region, max_date,
        min(MARKET_PRICE) as min_scrapped, max(MARKET_PRICE) as max_scrapped, median(MARKET_PRICE) as median_scrapped
    from (
        select MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.*, max(date) over(partition by region, MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id, competitor) as max_date
        from MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES
        join finance.all_cogs f on f.product_id = MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date 
        where date >= current_date - 5 and MARKET_PRICE between f.wac_p * 0.7 and wac_p*1.3
        qualify date = max_date 
    ) group by all 
),

local_prices as (
    SELECT case when cpu.cohort_id in (700) then 'Cairo'
                when cpu.cohort_id in (701) then 'Giza'
                when cpu.cohort_id in (704) then 'Delta East'
                when cpu.cohort_id in (703) then 'Delta West'
                when cpu.cohort_id in (1123,1124,1125,1126) then 'Upper Egypt'
                when cpu.cohort_id in (702) then 'Alexandria'
           end as region,
           cohort_id, pu.product_id, pu.packing_unit_id, pu.basic_unit_count, avg(cpu.price) as price
    FROM cohort_product_packing_units cpu
    join PACKING_UNIT_PRODUCTS pu on pu.id = cpu.product_packing_unit_id
    WHERE cpu.cohort_id in (700,701,702,703,704,1123,1124,1125,1126)
        and cpu.created_at::date <> '2023-07-31' and cpu.is_customized = true
    group by all 
),

live_prices as (
    select region, cohort_id, product_id, pu_id as packing_unit_id, buc as basic_unit_count, NEW_PRICE as price
    from materialized_views.DBDP_PRICES
    where created_at = Current_timestamp::date
        and DATE_PART('hour', Current_timestamp::time) BETWEEN SPLIT_PART(time_slot, '-', 1)::int AND SPLIT_PART(time_slot, '-', 2)::int
        and cohort_id in (700,701,702,703,704,696,695,698,697,699,1123,1124,1125,1126)
),

prices as (
    select * from (
        SELECT *, 1 AS priority FROM live_prices
        UNION ALL
        SELECT *, 2 AS priority FROM local_prices
    )
    QUALIFY ROW_NUMBER() OVER (PARTITION BY region, cohort_id, product_id, packing_unit_id ORDER BY priority) = 1
),

maxab_prices as (
    select region, cohort_id, product_id, price from prices where basic_unit_count = 1 
),

sales as (
    SELECT DISTINCT cpc.cohort_id, pso.product_id, sum(pso.total_price) as nmv
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id   
    join COHORT_PRICING_CHANGES cpc on cpc.id = pso.cohort_pricing_change_id
    WHERE so.created_at::date between date_trunc('month', Current_timestamp::date - 120) and Current_timestamp::date - 1
        AND so.sales_order_status_id not in (7,12)
        AND so.channel IN ('telesales','retailer')
        AND pso.purchased_item_count <> 0
    GROUP BY ALL
),

margin_change as (
    select product_id, cohort_id, (0.6*product_std) + (0.3*brand_std) + (0.1*cat_std) as std, avg_margin
    from (
        select product_id, cohort_id, stddev(product_margin) as product_std, stddev(brand_margin) as brand_std,
            stddev(cat_margin) as cat_std, avg(product_margin) as avg_margin
        from (
            select distinct product_id, order_date, cohort_id,
                (nmv-cogs_p)/nmv as product_margin, (brand_nmv-brand_cogs)/brand_nmv as brand_margin,
                (cat_nmv-cat_cogs)/cat_nmv as cat_margin
            from (
                SELECT DISTINCT so.created_at::date as order_date, cpc.cohort_id, pso.product_id,
                    brands.name_ar as brand, categories.name_ar as cat,
                    sum(COALESCE(f.wac_p,0) * pso.purchased_item_count * pso.basic_unit_count) as cogs_p,
                    sum(pso.total_price) as nmv,
                    sum(nmv) over(partition by order_date, cat, brand) as brand_nmv,
                    sum(cogs_p) over(partition by order_date, cat, brand) as brand_cogs,
                    sum(nmv) over(partition by order_date, cat) as cat_nmv,
                    sum(cogs_p) over(partition by order_date, cat) as cat_cogs
                FROM product_sales_order pso
                JOIN sales_orders so ON so.id = pso.sales_order_id   
                join COHORT_PRICING_CHANGES cpc on cpc.id = pso.cohort_pricing_change_id
                JOIN products on products.id=pso.product_id
                JOIN brands on products.brand_id = brands.id 
                JOIN categories ON products.category_id = categories.id
                JOIN finance.all_cogs f ON f.product_id = pso.product_id
                    AND f.from_date::date <= so.created_at::date AND f.to_date::date > so.created_at::date
                WHERE so.created_at::date between date_trunc('month', Current_timestamp::date - 120) and Current_timestamp::date
                    AND so.sales_order_status_id not in (7,12)
                    AND so.channel IN ('telesales','retailer')
                    AND pso.purchased_item_count <> 0
                GROUP BY ALL
            )
        ) group by all 
    )
),

cat_brand_target as (
    SELECT DISTINCT cat, brand, margin as target_bm
    FROM performance.commercial_targets cplan
    QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE) OVER()) = DATE_TRUNC('month', Current_timestamp::date) 
        THEN DATE_TRUNC('month', Current_timestamp::date)
        ELSE DATE_TRUNC('month', Current_timestamp::date - INTERVAL '1 month') END = DATE_TRUNC('month', date)
),

cat_target as (
    select cat, sum(target_bm * (target_nmv/cat_total)) as cat_target_margin
    from (
        select *, sum(target_nmv) over(partition by cat) as cat_total
        from (
            select cat, brand, avg(target_bm) as target_bm, sum(target_nmv) as target_nmv
            from (
                SELECT DISTINCT date, city as region, cat, brand, margin as target_bm, nmv as target_nmv
                FROM performance.commercial_targets cplan
                QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE) OVER()) = DATE_TRUNC('month', Current_timestamp::date) 
                    THEN DATE_TRUNC('month', Current_timestamp::date)
                    ELSE DATE_TRUNC('month', Current_timestamp::date - INTERVAL '1 month') END = DATE_TRUNC('month', date)
            ) group by all
        )
    ) group by all 
)

select distinct maxab.cohort_id, maxab.product_id,
    CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
    brands.name_ar as brand, categories.name_ar as cat, sections.name_ar as section_name,
    maxab.price as maxab_price, bs.ben_soliman_price,
    final_min_price, final_max_price, final_mod_price,
    min_scrapped, median_scrapped, max_scrapped,
    wac_p, coalesce(nmv,0) as nmv, coalesce(mc.std,0.01) as std,
    coalesce(coalesce(cbt.target_bm, ct.cat_target_margin),0) as target_margin,
    coalesce(avg_margin,0) as avg_margin
from maxab_prices maxab
left join ben_soliman bs on bs.product_id = maxab.product_id
left join final_mp fmp on fmp.product_id = maxab.product_id and fmp.region = maxab.region
left join sales s on s.product_id = maxab.product_id and s.cohort_id = maxab.cohort_id
left join scrapped_data sd on sd.product_id = maxab.product_id and sd.region = maxab.region
join finance.all_cogs f on f.product_id = maxab.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date
JOIN products on products.id=maxab.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN sections ON sections.id = categories.section_id
JOIN product_units ON product_units.id = products.unit_id 
left join margin_change mc on mc.product_id = maxab.product_id and mc.cohort_id = maxab.cohort_id
left join cat_brand_target cbt on cbt.brand = brands.name_ar and cbt.cat = categories.name_ar 
left join cat_target ct on ct.cat = categories.name_ar 
'''

market_cols = ['cohort_id', 'product_id', 'sku', 'brand', 'cat', 'section_name', 'maxab_price',
               'ben_soliman_price', 'final_min_price', 'final_max_price', 'final_mod_price',
               'min_scrapped', 'median_scrapped', 'max_scrapped', 'wac_p', 'nmv', 'std', 'target_margin', 'avg_margin']

market_main_data = to_numeric_columns(query_snowflake(MARKET_DATA_QUERY, columns=market_cols))
market_main_data = market_main_data[market_cols].drop_duplicates(subset=['cohort_id', 'product_id'])
print(f"Loaded {len(market_main_data)} market data records")    

In [None]:
# =============================================================================
# DATA LOADING - Additional Queries (Groups, Price Ups, Sales, WAC, Stocks, Stats)
# =============================================================================

# Product commercial groups from PostgreSQL
groups = setup_environment_2.dwh_pg_query(
    "SELECT * FROM materialized_views.sku_commercial_groups", 
    columns=['product_id', 'group']
)
groups.columns = groups.columns.str.lower()
groups = to_numeric_columns(groups)

# Price ups data
price_ups = to_numeric_columns(query_snowflake('''
    SELECT region, product_id, new_pp, forecasted_date
    FROM materialized_views.DBDP_PRICE_UPS
''', columns=['region', 'product_id', 'new_pp', 'forcasted_date']))

# Sales data (120-day history)
sales = to_numeric_columns(query_snowflake('''
    SELECT DISTINCT cpc.cohort_id, pso.product_id,
        CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
        brands.name_ar as brand, categories.name_ar as cat,
        sum(pso.total_price) as nmv
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN COHORT_PRICING_CHANGES cpc ON cpc.id = pso.COHORT_PRICING_CHANGE_id
    JOIN products ON products.id = pso.product_id
    JOIN brands ON products.brand_id = brands.id 
    JOIN categories ON products.category_id = categories.id
    JOIN product_units ON product_units.id = products.unit_id 
    WHERE so.created_at::date BETWEEN current_date - 120 AND current_date - 1 
        AND so.sales_order_status_id NOT IN (7, 12)
        AND so.channel IN ('telesales', 'retailer')
        AND pso.purchased_item_count <> 0
        AND cpc.cohort_id IN (700,701,702,703,704,1123,1124,1125,1126)
    GROUP BY ALL
''', columns=['cohort_id', 'product_id', 'sku', 'brand', 'cat', 'nmv']))

# WAC (Weighted Average Cost) data
wacs = to_numeric_columns(query_snowflake(f'''
    SELECT product_id, wac_p
    FROM finance.all_cogs f 
    WHERE CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) BETWEEN f.from_date AND f.to_date 
''', columns=['product_id', 'wac_p']))

# Current stocks
stocks = to_numeric_columns(query_snowflake('''
    SELECT DISTINCT product_warehouse.warehouse_id, product_warehouse.product_id,
        (product_warehouse.available_stock)::integer as stocks
    FROM product_warehouse 
    JOIN products ON product_warehouse.product_id = products.id
    JOIN product_units ON products.unit_id = product_units.id
    WHERE product_warehouse.warehouse_id NOT IN (6, 9, 10)
        AND product_warehouse.activation = 'true'
        AND product_warehouse.is_basic_unit = 1
''', columns=['warehouse_id', 'product_id', 'cu_stocks']))

# Product statistics
stats = to_numeric_columns(query_snowflake('''
    SELECT region, product_id, optimal_bm, MIN_BOUNDARY, MAX_BOUNDARY, MEDIAN_BM
    FROM (
        SELECT region, product_id, target_bm, optimal_bm, MIN_BOUNDARY, MAX_BOUNDARY, MEDIAN_BM,
            MAX(created_at) OVER(PARTITION BY product_id, region) as max_date, created_at
        FROM materialized_views.PRODUCT_STATISTICS
        WHERE created_at::date >= date_trunc('month', current_date - 60)
        QUALIFY max_date = created_at
    )
''', columns=['region', 'product_id', 'optimal_bm', 'min_boundary', 'max_boundary', 'median_bm']))

print(f"Loaded: {len(groups)} groups, {len(price_ups)} price_ups, {len(sales)} sales, {len(wacs)} wacs, {len(stocks)} stocks, {len(stats)} stats")

In [None]:
# =============================================================================
# DATA LOADING - TGTG Aging Monitor (Google Sheets)
# =============================================================================

# Get current and recent week numbers for sheet lookup
week_number = datetime.now().isocalendar()[1]
week_candidates = [str(week_number), str(week_number - 1), str(week_number - 2)]

# Find the most recent TGTG sheet
tgtg_worksheets = client.open('Egypt SKUs Aging Monitor').worksheets()
worksheet_names = [ws.title for ws in tgtg_worksheets]

sheet_name = None
for week_str in week_candidates:
    for name in worksheet_names:
        if week_str in name:
            sheet_name = name
            break
    if sheet_name:
        break

# Load TGTG data
tgtg_sheet = client.open('Egypt SKUs Aging Monitor').worksheet(sheet_name)
tgtg_data = tgtg_sheet.get_all_values()

if tgtg_data:
    tgtg_df = pd.DataFrame(tgtg_data[2:], columns=tgtg_data[1]).iloc[:, :21]
    tgtg_df = to_numeric_columns(tgtg_df)
    tgtg_df = tgtg_df[tgtg_df['Fulfillment confirmation'] == 'confirmed']
    
    # Select relevant warehouse columns
    warehouse_cols = ['SKU', 'Sharqya', 'Khorshed Alex', 'Bani sweif', 'Mostorod', 'Barageel', 
                      'El-Mahala', 'Sohag', 'Mansoura FC', 'Assiut FC', 'Menya Samalot', 'Tanta']
    tgtg_df = tgtg_df[warehouse_cols]
    
    # Melt to long format (SKU x warehouse -> stocks)
    tgtg_long = tgtg_df.melt(id_vars=['SKU'], var_name='warehouse', value_name='stocks')
    tgtg_long['product_id'] = tgtg_long.apply(convert_sku_id, axis=1)
    tgtg_long = tgtg_long.drop(columns='SKU')
    tgtg_long = tgtg_long[~tgtg_long['stocks'].isna()]
else:
    tgtg_long = pd.DataFrame(columns=['warehouse', 'stocks', 'product_id'])

print(f"Loaded TGTG data from sheet '{sheet_name}': {len(tgtg_long)} warehouse-product records")

In [None]:
# =============================================================================
# DATA PROCESSING - Sales Tier Assignment
# =============================================================================

# Calculate NMV contribution and cumulative contribution
sales['total_nmv'] = sales.groupby('cohort_id')['nmv'].transform('sum')
sales['cntrb_nmv'] = sales['nmv'] / sales['total_nmv']
sales = sales.sort_values(['cohort_id', 'nmv'], ascending=[True, False])
sales['nmv_cumulative_cntrb'] = sales.groupby('cohort_id')['cntrb_nmv'].cumsum()

# Assign base tier from cumulative contribution
sales['tier'] = sales['nmv_cumulative_cntrb'].apply(assign_tier)

# Apply brand/category tier adjustments
sales.loc[sales['cat'].isin(MIN_PRICE_CATEGORIES), 'tier'] = np.maximum(sales['tier'] - 1, 1)
#sales.loc[sales['brand'].isin(blue_list), 'tier'] = np.maximum(sales['tier'] - 1, 1)
sales.loc[sales['brand'].isin(MIN_PRICE_BRANDS), 'tier'] = 1
sales.loc[sales['brand'].isin(BELOW_MARKET_BRANDS), 'tier'] = 0
sales.loc[sales['brand'].isin(AVG_PRICE_BRANDS), 'tier'] = 3
sales.loc[sales['brand'].isin(MAX_PRICE_BRANDS), 'tier'] = 5

# Apply status-based adjustment (reduce tier by 1 if "min" mode)
if 'min' in STATUS:
    sales['tier'] = np.maximum(sales['tier'] - 1, 0)

print(f"Tier distribution:\n{sales['tier'].value_counts().sort_index()}")       

In [None]:
# =============================================================================
# DATA PROCESSING - Market Data with Groups
# =============================================================================

# Merge market data with product groups
market_data = market_main_data.copy()
market_data = market_data.merge(groups, on='product_id', how='left')

# Calculate group-level aggregated prices for products with group assignments
groups_data = market_data[~market_data['group'].isna()].copy()
groups_data['group_nmv'] = groups_data.groupby(['group', 'cohort_id'])['nmv'].transform('sum')
groups_data['cntrb'] = (groups_data['nmv'] / groups_data['group_nmv']).fillna(1)

# Aggregate group prices
groups_agg = groups_data.groupby(['group', 'cohort_id']).agg({
    'ben_soliman_price': 'median', 'final_min_price': 'median', 'final_max_price': 'median',
    'final_mod_price': 'median', 'min_scrapped': 'median', 'median_scrapped': 'median', 'max_scrapped': 'median'
}).reset_index()

# Fill missing prices with group-level prices
merged = market_data.merge(groups_agg, on=['group', 'cohort_id'], how='left', suffixes=('', '_group'))
price_cols = ['ben_soliman_price', 'final_min_price', 'final_max_price', 'final_mod_price', 
              'min_scrapped', 'median_scrapped', 'max_scrapped']
for col in price_cols:
    merged[col] = merged[col].fillna(merged[f'{col}_group'])

market_data = merged.drop(columns=[f'{c}_group' for c in price_cols])

print(f"Market data after group processing: {len(market_data)} records")    

In [None]:
# =============================================================================
# DATA PROCESSING - Price Analysis & Margin Calculation
# =============================================================================

# Apply price analysis to calculate price percentiles
market_data[['minimum', 'percentile_25', 'percentile_50', 'percentile_75', 'maximum']] = \
    market_data.apply(price_analysis, axis=1, result_type='expand')

# Filter out records without valid price analysis
market_data = market_data[~market_data['minimum'].isna()]

# Calculate below/above market bounds
market_data[['below_market', 'above_market']] = market_data.apply(calculate_step_bounds, axis=1, result_type='expand')

# Calculate margin metrics
market_data = market_data[['cohort_id', 'product_id', 'maxab_price', 'wac_p', 'minimum', 
                           'percentile_25', 'percentile_50', 'percentile_75', 'maximum', 
                           'below_market', 'above_market']]

# Convert prices to margins
market_data['below_market'] = (market_data['below_market'] - market_data['wac_p']) / market_data['below_market']
market_data['market_min'] = (market_data['minimum'] - market_data['wac_p']) / market_data['minimum']
market_data['market_25'] = (market_data['percentile_25'] - market_data['wac_p']) / market_data['percentile_25']
market_data['market_50'] = (market_data['percentile_50'] - market_data['wac_p']) / market_data['percentile_50']
market_data['market_75'] = (market_data['percentile_75'] - market_data['wac_p']) / market_data['percentile_75']
market_data['market_max'] = (market_data['maximum'] - market_data['wac_p']) / market_data['maximum']
market_data['above_market'] = (market_data['above_market'] - market_data['wac_p']) / market_data['above_market']
market_data['current_margin'] = (market_data['maxab_price'] - market_data['wac_p']) / market_data['maxab_price']

market_data = market_data[['cohort_id', 'product_id', 'current_margin', 'below_market', 'market_min', 
                           'market_25', 'market_50', 'market_75', 'market_max', 'above_market']]

print(f"Market data after price analysis: {len(market_data)} records")    

In [None]:
# =============================================================================
# DATA PROCESSING - Calculate Min/Max Margins (Found Products)
# =============================================================================

# Merge with existing min_max constraints and sales tiers
found = min_max_df.merge(market_data, on=['cohort_id', 'product_id'])
found = found.merge(sales[['cohort_id', 'product_id', 'tier']], on=['cohort_id', 'product_id'])

# Select min/max margins based on tier
tier_conditions = [found['tier'] == i for i in range(6)]
tier_min_choices = [found['below_market'], found['market_min'], found['market_25'], 
                    found['market_50'], found['market_75'], found['market_max']]
tier_max_choices = [found['market_min'], found['market_25'], found['market_50'], 
                    found['market_75'], found['market_max'], found['market_max'] * 1.2]

found['selected_min'] = np.select(tier_conditions, tier_min_choices, default=found['market_min'])
found['selected_max'] = np.select(tier_conditions, tier_max_choices, default=found['market_min'])

# Filter based on margin difference thresholds
found['min_cu_diff'] = (found['selected_min'] - found['current_margin']) / found['current_margin']
found['min_min_diff'] = (found['selected_min'] - found['min_margin']) / found['min_margin']
found = found[((found['min_cu_diff'].between(-0.55, 0.55)) | (found['min_min_diff'].between(-0.55, 0.55)))]

# Calculate final new min/max
found['diff'] = (found['max_margin'] - found['min_margin']) / found['min_margin']
found['new_min'] = found['selected_min']
found['new_max'] = np.minimum(
    np.maximum(np.maximum((found['diff'] + 1) * found['selected_min'], found['selected_max']), 
               found['selected_min'] + 0.01),
    found['selected_min'] + 0.04
)
found = found[['cohort_id', 'product_id', 'new_min', 'new_max']]
found['type'] = 'both'

print(f"Found (products with existing min_max): {len(found)} records")
    

In [None]:
# =============================================================================
# DATA PROCESSING - Calculate Min/Max Margins (New Products - MP Only)
# =============================================================================

# Identify products not in existing min_max
min_max_df['flag'] = 1
not_found = market_data.merge(min_max_df[['cohort_id', 'product_id', 'flag']], on=['cohort_id', 'product_id'], how='left')
not_found = not_found.merge(sales[['cohort_id', 'product_id', 'tier']], on=['cohort_id', 'product_id'])
not_found = not_found[not_found['flag'].isna()]

# Select margins based on tier
tier_conditions = [not_found['tier'] == i for i in range(6)]
tier_min_choices = [not_found['below_market'], not_found['market_min'], not_found['market_25'],
                    not_found['market_50'], not_found['market_75'], not_found['market_max']]
tier_max_choices = [not_found['market_min'], not_found['market_25'], not_found['market_50'],
                    not_found['market_75'], not_found['market_max'], not_found['market_max'] * 1.2]

not_found['selected_min'] = np.select(tier_conditions, tier_min_choices, default=not_found['market_min'])
not_found['selected_max'] = np.select(tier_conditions, tier_max_choices, default=not_found['market_min'])

# Filter based on margin difference
not_found['min_cu_diff'] = (not_found['selected_min'] - not_found['current_margin']) / not_found['current_margin']
not_found = not_found[not_found['min_cu_diff'].between(-2, 2)]

# Calculate new min/max
not_found['new_min'] = not_found['selected_min']
not_found['new_max'] = np.minimum(
    np.maximum(not_found['selected_max'], not_found['selected_min'] + 0.01),
    not_found['selected_min'] + 0.04
)
not_found = not_found[['cohort_id', 'product_id', 'new_min', 'new_max']]
not_found['type'] = 'MP_only'

print(f"Not found (new products): {len(not_found)} records")

In [None]:
# =============================================================================
# DATA PROCESSING - Combine Results & Add Region Mapping
# =============================================================================

# Combine found and not_found
final_df = pd.concat([found, not_found], axis=0).drop_duplicates()

# Add region mapping
final_df = final_df.merge(REGION_COHORT_MAP, on='cohort_id')
final_df = final_df[['cohort_id', 'product_id', 'new_min', 'new_max', 'type']].drop_duplicates()
final_df.columns = ['cohort_id', 'product_id', 'min_margin', 'max_margin', 'type']

print(f"Combined dataframe: {len(final_df)} records")

In [None]:
# =============================================================================
# DATA PROCESSING - TGTG (Too Good To Go) Products
# =============================================================================

# Process TGTG aging products with special margin rules
tgtg = tgtg_long.merge(wacs, on='product_id')
tgtg = tgtg.merge(WAREHOUSE_CONFIG, on='warehouse')
tgtg = tgtg.merge(TGTG_EXCLUSIONS, on=['product_id', 'warehouse_id'], how='left')
tgtg = tgtg[tgtg['remove'].isna()]
tgtg = tgtg.merge(stocks, on=['warehouse_id', 'product_id'])
tgtg = tgtg[tgtg['cu_stocks'] > 0]

# Calculate stock value and filter
tgtg['stock_value'] = tgtg['cu_stocks'] * tgtg['wac_p']
tgtg = tgtg.sort_values(by='stock_value', ascending=False)
tgtg = tgtg[tgtg['stock_value'] > 100]

# Merge with market data and stats
tgtg = tgtg.merge(market_data, on=['cohort_id', 'product_id'], how='left')
tgtg = tgtg.merge(stats, on=['region', 'product_id'])
tgtg = tgtg.merge(market_main_data[['cohort_id', 'product_id', 'target_margin']], on=['cohort_id', 'product_id'], how='left')
tgtg = tgtg.fillna(1000)

# Calculate aggressive min margins for TGTG products
tgtg['min_margin'] = np.minimum(
    np.minimum(
        np.minimum(tgtg['market_min'] * 0.8, tgtg['target_margin'] / 4),
        tgtg['min_boundary'] * 0.9
    ),
    tgtg['optimal_bm'] * 0.75
)
tgtg['max_margin'] = tgtg['min_margin']

# Save TGTG data for reference
tgtg.to_excel("Min_max_data/tgtg.xlsx", index=False)

# Aggregate TGTG by cohort/product
tgtg = tgtg[['cohort_id', 'product_id', 'min_margin', 'max_margin']]
tgtg = tgtg.groupby(['cohort_id', 'product_id']).agg({'min_margin': 'min', 'max_margin': 'min'}).reset_index()
tgtg['type'] = 'TGTG'

print(f"TGTG products: {len(tgtg)} records")

In [None]:
# =============================================================================
# DATA PROCESSING - Merge TGTG with Final DataFrame
# =============================================================================

# Remove TGTG products from main final_df to avoid duplicates
result = final_df.merge(tgtg[['product_id', 'cohort_id']], on=['product_id', 'cohort_id'], how='left', indicator=True)
result = result[result['_merge'] == 'left_only'].drop(columns=['_merge'])

# Add TGTG products to final
final_df = pd.concat([result, tgtg], axis=0)

print(f"Final dataframe after TGTG: {len(final_df)} records")  

In [None]:
# =============================================================================
# DATA PROCESSING - Price Ups & Final Adjustments
# =============================================================================

# Merge price_ups with region mapping
price_ups = price_ups.merge(REGION_COHORT_MAP, on='region')

# Merge with final_df
final_df = final_df.merge(price_ups, on=['product_id', 'cohort_id'], how='left')

# Adjust max_margin for products with price ups (except TGTG)
mask = (~final_df['new_pp'].isna()) & (final_df['type'] != 'TGTG')
final_df.loc[mask, 'max_margin'] = np.minimum(
    final_df.loc[mask, 'max_margin'] + 0.15, 
    final_df.loc[mask, 'min_margin'] + 0.2
)

# Add enforce flag for price_ups products
final_df['enforce'] = np.where(~final_df['new_pp'].isna(), 1, np.nan)

# Merge with sales for SKU info
final_df = final_df.drop_duplicates()
final_df = final_df.merge(sales, on=['cohort_id', 'product_id'], how='left')
final_df = final_df[['cohort_id', 'product_id', 'sku', 'min_margin', 'max_margin', 'enforce', 'brand', 'type']]

# Add comparison with existing min_max
final_df = final_df.merge(
    min_max_df[['cohort_id', 'product_id', 'min_margin', 'max_margin']].rename(
        columns={'min_margin': 'old_min', 'max_margin': 'old_max'}
    ),
    on=['cohort_id', 'product_id'], 
    how='left'
)

print(f"Final dataframe ready: {len(final_df)} records")

In [None]:
# =============================================================================
# OUTPUT - Export Final Results
# =============================================================================

# Save to Excel
output_path = 'Min_max_data/min_max_data.xlsx'
final_df.to_excel(output_path, index=False)

# Summary statistics
print(f"\n{'='*60}")
print(f"MIN/MAX MARGIN CALCULATION COMPLETE")
print(f"{'='*60}")
print(f"Total records: {len(final_df)}")
print(f"\nBreakdown by type:")
print(final_df['type'].value_counts())
print(f"\nOutput saved to: {output_path}")