# Market Data Module

This module provides fresh market prices and margin tiers data on demand.

## Functions
- `get_market_prices()` - Fetch and process market prices from all sources
- `get_margin_tiers()` - Calculate margin tiers for products
- `get_market_and_margin_data()` - Combined function for both

## Output Columns

**Market Prices:**
- `below_market`, `market_min`, `market_25`, `market_50`, `market_75`, `market_max`, `above_market`

**Margin Tiers:**
- `margin_tier_below`, `margin_tier_1`, `margin_tier_2`, `margin_tier_3`, `margin_tier_4`, `margin_tier_5`, `margin_tier_above_1`, `margin_tier_above_2`

## Usage
```python
%run market_data_module.ipynb
df_market = get_market_and_margin_data(df_input)
```


In [1]:
# =============================================================================
# IMPORTS & CONFIGURATION
# =============================================================================
import pandas as pd
import numpy as np
from datetime import datetime
import pytz
import snowflake.connector
import os

# Import setup_environment_2 for credentials
import sys
sys.path.append('..')
import setup_environment_2

# Initialize environment (loads Snowflake credentials)
setup_environment_2.initialize_env()

# Cairo timezone
CAIRO_TZ = pytz.timezone('Africa/Cairo')
CAIRO_NOW = datetime.now(CAIRO_TZ)

# =============================================================================
# SNOWFLAKE CONNECTION
# =============================================================================
def query_snowflake(query):
    """Execute a query on Snowflake and return results as DataFrame."""
    con = snowflake.connector.connect(
        user=os.environ["SNOWFLAKE_USERNAME"],
        account=os.environ["SNOWFLAKE_ACCOUNT"],
        password=os.environ["SNOWFLAKE_PASSWORD"],
        database=os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        data = cur.fetchall()
        columns = [desc[0].lower() for desc in cur.description]
        return pd.DataFrame(data, columns=columns)
    finally:
        con.close()

def get_snowflake_timezone():
    result = query_snowflake("SHOW PARAMETERS LIKE 'TIMEZONE'")
    return result.value[0] if len(result) > 0 else "UTC"

# Get timezone for queries
TIMEZONE = get_snowflake_timezone()

print(f"Market Data Module loaded at {CAIRO_NOW.strftime('%Y-%m-%d %H:%M:%S')} Cairo time")
print(f"Snowflake timezone: {TIMEZONE}")


  warn_incompatible_dep(


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json
Market Data Module loaded at 2026-01-24 01:55:57 Cairo time
Snowflake timezone: America/Los_Angeles


In [2]:
# =============================================================================
# MARKET PRICE QUERIES
# =============================================================================
# Note: TIMEZONE is set dynamically from Snowflake in the imports cell above
# =============================================================================
# 1. BEN SOLIMAN PRICES QUERY
# =============================================================================
BEN_SOLIMAN_QUERY = f'''
WITH lower as (
    select distinct product_id, sku, new_d*bs_price as ben_soliman_price, INJECTION_DATE
    from (
        select maxab_product_id as product_id, maxab_sku as sku, INJECTION_DATE, wac1, wac_p,
            (bs_price/bs_unit_count) as bs_price, diff, cu_price,
            case when p1 > 1 then child_quantity else 0 end as scheck,
            round(p1/2)*2 as p1, p2,
            case when (ROUND(p1 / scheck) * scheck) = 0 then p1 else (ROUND(p1 / scheck) * scheck) end as new_d
        from (
            select sm.*, wac1, wac_p, 
                abs((bs_price/bs_unit_count)-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff,
                cpc.price as cu_price, pup.child_quantity,
                round((cu_price/(bs_price/bs_unit_count))) as p1, 
                round(((bs_price/bs_unit_count)/cu_price)) as p2
            from materialized_views.savvy_mapping sm 
            join finance.all_cogs f on f.product_id = sm.maxab_product_id 
                and CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_Date and f.to_date
            join PACKING_UNIT_PRODUCTS pu on pu.product_id = sm.maxab_product_id and pu.IS_BASIC_UNIT = 1 
            join cohort_product_packing_units cpc on cpc.PRODUCT_PACKING_UNIT_ID = pu.id and cohort_id = 700 
            join packing_unit_products pup on pup.product_id = sm.maxab_product_id and pup.is_basic_unit = 1  
            where bs_price is not null 
                and INJECTION_DATE::date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 5 
                and diff > 0.3 and p1 > 1
        )
    )
    qualify max(INJECTION_DATE) over(partition by product_id) = INJECTION_DATE
),

m_bs as (
    select z.* from (
        select maxab_product_id as product_id, maxab_sku as sku, avg(bs_final_price) as ben_soliman_price, INJECTION_DATE
        from (
            select *, row_number() over(partition by maxab_product_id order by diff) as rnk_2 
            from (
                select *, (bs_final_price-wac_p)/wac_p as diff_2 
                from (
                    select *, bs_price/maxab_basic_unit_count as bs_final_price 
                    from (
                        select *, row_number() over(partition by maxab_product_id, maxab_pu order by diff) as rnk 
                        from (
                            select *, max(INJECTION_DATE::date) over(partition by maxab_product_id, maxab_pu) as max_date
                            from (
                                select sm.*, wac1, wac_p, 
                                    abs(bs_price-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff 
                                from materialized_views.savvy_mapping sm 
                                join finance.all_cogs f on f.product_id = sm.maxab_product_id 
                                    and CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_Date and f.to_date
                                where bs_price is not null 
                                    and INJECTION_DATE::date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 5 
                                    and diff < 0.3
                            )
                            qualify max_date = INJECTION_DATE
                        ) qualify rnk = 1 
                    )
                ) where diff_2 between -0.5 and 0.5 
            ) qualify rnk_2 = 1 
        ) group by all
    ) z 
    join finance.all_cogs f on f.product_id = z.product_id 
        and CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_Date and f.to_date
    where ben_soliman_price between f.wac_p*0.8 and f.wac_p*1.3
)

select product_id, avg(ben_soliman_price) as ben_soliman_price
from (
    select * from (
        select * from m_bs 
        union all
        select * from lower
    )
    qualify max(INJECTION_DATE) over(partition by product_id) = INJECTION_DATE
)
group by all
'''



# Marketplace Prices Query
# =============================================================================
# 2. MARKETPLACE PRICES QUERY (with region fallback)
# =============================================================================
MARKETPLACE_PRICES_QUERY = f'''
WITH MP as (
    select region, product_id,
        min(min_price) as min_price, min(max_price) as max_price,
        min(mod_price) as mod_price, min(true_min) as true_min, min(true_max) as true_max
    from (
        select mp.region, mp.product_id, mp.pu_id,
            min_price/BASIC_UNIT_COUNT as min_price,
            max_price/BASIC_UNIT_COUNT as max_price,
            mod_price/BASIC_UNIT_COUNT as mod_price,
            TRUE_MIN_PRICE/BASIC_UNIT_COUNT as true_min,
            TRUE_MAX_PRICE/BASIC_UNIT_COUNT as true_max
        from materialized_views.marketplace_prices mp 
        join packing_unit_products pup on pup.product_id = mp.product_id and pup.packing_unit_id = mp.pu_id
        join finance.all_cogs f on f.product_id = mp.product_id 
            and CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_date and f.to_date
        where least(min_price, mod_price) between wac_p*0.9 and wac_p*1.3 
    )
    group by all 
),

region_mapping AS (
    SELECT * FROM (VALUES
        ('Delta East', 'Delta West'), ('Delta West', 'Delta East'),
        ('Alexandria', 'Cairo'), ('Alexandria', 'Giza'),
        ('Upper Egypt', 'Cairo'), ('Upper Egypt', 'Giza'),
        ('Cairo', 'Giza'), ('Giza', 'Cairo'),
        ('Delta West', 'Cairo'), ('Delta East', 'Cairo'),
        ('Delta West', 'Giza'), ('Delta East', 'Giza')
    ) AS region_mapping(region, fallback_region)
),

all_regions as (
    SELECT * FROM (VALUES
        ('Cairo'), ('Giza'), ('Delta West'), ('Delta East'), ('Upper Egypt'), ('Alexandria')
    ) AS x(region)
),

full_data as (
    select products.id as product_id, ar.region
    from products, all_regions ar
    where activation = 'true'
)

select region, product_id,
    min(final_min_price) as final_min_price, 
    min(final_max_price) as final_max_price,
    min(final_mod_price) as final_mod_price, 
    min(final_true_min) as final_true_min,
    min(final_true_max) as final_true_max
from (
    SELECT distinct w.region, w.product_id,
        COALESCE(m1.min_price, m2.min_price) AS final_min_price,
        COALESCE(m1.max_price, m2.max_price) AS final_max_price,
        COALESCE(m1.mod_price, m2.mod_price) AS final_mod_price,
        COALESCE(m1.true_min, m2.true_min) AS final_true_min,
        COALESCE(m1.true_max, m2.true_max) AS final_true_max
    FROM full_data w
    LEFT JOIN MP m1 ON w.region = m1.region and w.product_id = m1.product_id
    LEFT JOIN region_mapping rm ON w.region = rm.region
    LEFT JOIN MP m2 ON rm.fallback_region = m2.region AND w.product_id = m2.product_id
)
where final_min_price is not null 
group by all
'''


# =============================================================================
# 3. SCRAPPED DATA QUERY (Competitor prices from scraping)
# =============================================================================
SCRAPPED_QUERY = f'''
select product_id, region,
    MIN(market_price) AS min_scrapped,
    PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY market_price) AS scrapped25,
    PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY market_price) AS scrapped50,
    PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY market_price) AS scrapped75,
    MAX(market_price) AS max_scrapped
from (
    select distinct cmp.*, max(date) over(partition by region, cmp.product_id, competitor) as max_date
    from MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES cmp
    join finance.all_cogs f on f.product_id = cmp.product_id 
        and CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_date and f.to_date 
    where date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 7 
        and MARKET_PRICE between f.wac_p * 0.8 and wac_p * 1.3
    qualify date = max_date 
)
group by all
'''


# Product Groups Query
GROUPS_QUERY = '''
SELECT * FROM materialized_views.sku_commercial_groups
'''

print("Market price queries defined ✓")


Market price queries defined ✓


In [3]:
# =============================================================================
# PRICE ANALYSIS HELPER FUNCTIONS
# =============================================================================

def price_analysis(row):
    """
    Analyze prices and calculate percentiles for a product.
    
    Collects prices from all sources (Ben Soliman, Marketplace, Scrapped),
    filters for valid prices within acceptable range, and calculates percentiles.
    
    Args:
        row: DataFrame row containing price columns and wac_p
        
    Returns:
        tuple: (minimum, percentile_25, percentile_50, percentile_75, maximum)
    """
    wac = row['wac_p']
    avg_margin = row['avg_margin'] if row['avg_margin'] >= 0.01 else row['target_margin']
    std = np.maximum(row['std'], 0.0025)
    target_margin = row['target_margin']
    max_marg = np.maximum(avg_margin, target_margin)
    
    # Collect all price points from different sources
    price_list = [
        row.get('ben_soliman_price'), 
        row.get('final_min_price'), 
        row.get('final_mod_price'),
        row.get('final_max_price'), 
        row.get('final_true_min'), 
        row.get('final_true_max'),
        row.get('min_scrapped'), 
        row.get('scrapped25'), 
        row.get('scrapped50'), 
        row.get('scrapped75'), 
        row.get('max_scrapped')
    ]
    
    # Filter valid prices within acceptable range
    valid_prices = sorted({
        x for x in price_list 
        if x and not pd.isna(x) and x != 0 
        and wac / (1 - (avg_margin - (10 * std))) <= x <= wac / (1 - (max_marg + 10 * std))
        and x >= wac * (0.9 + target_margin * 0.7)
    })
    
    if not valid_prices:
        return np.nan, np.nan, np.nan, np.nan, np.nan
    
    return (
        np.min(valid_prices),
        np.percentile(valid_prices, 25),
        np.percentile(valid_prices, 50),
        np.percentile(valid_prices, 75),
        np.max(valid_prices)
    )


def calculate_step_bounds(row):
    """
    Calculate below/above market bounds based on price steps.
    
    Analyzes the steps between market price tiers and calculates
    bounds one step below minimum and one step above maximum.
    
    Args:
        row: DataFrame row with percentile columns
        
    Returns:
        tuple: (below_market_price, above_market_price)
    """
    wac = row['wac_p']
    std = row['std']
    target_margin = row.get('target_margin', 0.05)
    
    prices = [
        row['minimum'], 
        row['percentile_25'], 
        row['percentile_50'], 
        row['percentile_75'], 
        row['maximum']
    ]
    
    # Calculate valid steps between price points
    valid_steps = []
    for i in range(len(prices) - 1):
        step = prices[i + 1] - prices[i]
        if (step / wac) <= std * 1.2:
            valid_steps.append(step)
    
    avg_step = np.mean(valid_steps) if valid_steps else min(2 * std, 0.2 * target_margin)
    
    new_min = prices[0] - avg_step if (prices[0] - avg_step) >= wac else prices[0]
    new_max = prices[-1] + avg_step if (prices[-1] + avg_step) >= wac else prices[-1]
    
    return new_min, new_max


def weighted_median(series, weights):
    """
    Calculate weighted median of a series.
    
    Args:
        series: pd.Series of values
        weights: pd.Series of weights (e.g., NMV contribution)
        
    Returns:
        Weighted median value or NaN if no valid data
    """
    valid = ~series.isna() & ~weights.isna()
    s = series[valid]
    w = weights[valid]
    if len(s) == 0:
        return np.nan
    order = np.argsort(s)
    s, w = s.iloc[order], w.iloc[order]
    return s.iloc[np.searchsorted(np.cumsum(w), w.sum() / 2)]


def fill_missing_prices_from_groups(df: pd.DataFrame) -> pd.DataFrame:
    """
    Fill missing market prices using group-level aggregation.
    
    Fetches product groups from Snowflake, then for products in a group,
    calculates weighted median prices based on NMV contribution and 
    uses these to fill missing values.
    
    Args:
        df: DataFrame with market prices (group_id will be fetched from Snowflake)
        
    Returns:
        DataFrame with missing prices filled from group data
    """
    # Price columns to process
    price_cols = [
        'ben_soliman_price', 'final_min_price', 'final_max_price', 
        'final_mod_price', 'final_true_min', 'final_true_max',
        'min_scrapped', 'scrapped25', 'scrapped50', 'scrapped75', 'max_scrapped'
    ]
    
    # Fetch product groups from Snowflake
    df_groups = fetch_product_groups()
    
    if len(df_groups) == 0:
        print("  No product groups found in database")
        return df
    
    # Merge groups with df (drop existing group_id if any)
    df = df.drop(columns=['group_id'], errors='ignore')
    df = df.merge(
        df_groups[['product_id', 'group_id']], 
        on='product_id', 
        how='left'
    )
    
    # Get products with groups
    groups_data = df[~df['group_id'].isna()].copy()
    
    if len(groups_data) == 0:
        print("  No products with groups found, skipping group processing")
        return df
    
    print(f"  Processing {len(groups_data)} products with groups...")
    
    # Calculate NMV contribution within each group
    # Use 'nmv' if available, otherwise use equal weights
    if 'nmv' in groups_data.columns:
        groups_data['group_nmv'] = groups_data.groupby(['group_id', 'cohort_id'])['nmv'].transform('sum')
        groups_data['cntrb'] = (groups_data['nmv'] / groups_data['group_nmv']).fillna(1)
    else:
        # Equal weights if no NMV
        groups_data['cntrb'] = 1
    
    # Flag if any price column is non-NaN
    available_price_cols = [c for c in price_cols if c in groups_data.columns]
    groups_data['flag_non_nan'] = groups_data[available_price_cols].notna().any(axis=1).astype(int)
    
    # Perform weighted aggregation for groups
    groups_agg = (
        groups_data[groups_data['flag_non_nan'] == 1]
        .groupby(['group_id', 'cohort_id'])
        .apply(lambda g: pd.Series({
            col: weighted_median(g[col], g['cntrb']) for col in available_price_cols if col in g.columns
        }))
        .reset_index()
    )
    
    if len(groups_agg) == 0:
        print("  No group aggregations computed")
        return df
    
    # Fill missing prices with group-level prices
    merged = df.merge(groups_agg, on=['group_id', 'cohort_id'], how='left', suffixes=('', '_group'))
    
    for col in available_price_cols:
        group_col = f'{col}_group'
        if group_col in merged.columns:
            merged[col] = merged[col].fillna(merged[group_col])
    
    # Drop the group columns
    group_cols_to_drop = [f'{c}_group' for c in available_price_cols if f'{c}_group' in merged.columns]
    merged = merged.drop(columns=group_cols_to_drop, errors='ignore')
    
    print(f"  Group processing complete")
    return merged


print("Price analysis helper functions defined ✓")


Price analysis helper functions defined ✓


In [4]:
# =============================================================================
# DATA FETCHING FUNCTIONS
# =============================================================================

def fetch_ben_soliman_prices():
    """Fetch Ben Soliman competitor prices from Snowflake."""
    print("  Fetching Ben Soliman prices...")
    df = query_snowflake(BEN_SOLIMAN_QUERY)
    print(f"    Loaded {len(df)} records")
    return df


def fetch_marketplace_prices():
    """Fetch marketplace prices from Snowflake."""
    print("  Fetching marketplace prices...")
    df = query_snowflake(MARKETPLACE_QUERY)
    print(f"    Loaded {len(df)} records")
    return df


def fetch_scrapped_prices():
    """Fetch scrapped competitor prices from Snowflake."""
    print("  Fetching scrapped prices...")
    df = query_snowflake(SCRAPPED_QUERY)
    print(f"    Loaded {len(df)} records")
    return df


def fetch_product_groups():
    """Fetch product groups from Snowflake."""
    print("  Fetching product groups...")
    df = query_snowflake(GROUPS_QUERY)
    print(f"    Loaded {len(df)} records")
    return df


print("Data fetching functions defined ✓")


Data fetching functions defined ✓


In [None]:
# =============================================================================
# MAIN PROCESSING FUNCTIONS
# =============================================================================

def get_market_prices(df_base: pd.DataFrame) -> pd.DataFrame:
    """
    Fetch and process all market prices.
    
    Merges Ben Soliman, Marketplace, and Scrapped prices with the base dataframe.
    Calculates market price percentiles and margin tiers.
    
    Args:
        df_base: DataFrame with product_id, warehouse_id, wac_p, avg_margin, 
                 std, target_margin columns
                 
    Returns:
        DataFrame with market price columns added:
        - below_market, market_min, market_25, market_50, market_75, market_max, above_market
        (These are MARGIN values, not prices)
    """
    print("\n" + "="*60)
    print("FETCHING MARKET PRICES")
    print("="*60)
    
    # Get warehouse-region mapping
    df = df_base.copy()
    
    # Ensure required columns exist
    required_cols = ['product_id', 'wac_p', 'avg_margin', 'std', 'target_margin']
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")
    
    # 1. Fetch all market price sources
    df_bs = fetch_ben_soliman_prices()
    df_mp = fetch_marketplace_prices()
    df_scrapped = fetch_scrapped_prices()
    
    # 2. Merge Ben Soliman prices (product level)
    df = df.merge(
        df_bs[['product_id', 'ben_soliman_price']],
        on='product_id',
        how='left'
    )
    
    # 3. For marketplace and scrapped, we need region mapping
    # Get region from warehouse if available
    if 'region' not in df.columns:
        # Try to get region from warehouse_id
        REGION_QUERY = '''
        SELECT id as warehouse_id, region 
        FROM warehouses 
        WHERE is_active = TRUE
        '''
        print("  Fetching warehouse regions...")
        df_regions = query_snowflake(REGION_QUERY)
        if 'warehouse_id' in df.columns:
            df = df.merge(df_regions, on='warehouse_id', how='left')
        else:
            # Default to 'cairo' if no warehouse info
            df['region'] = 'cairo'
    
    # 4. Merge marketplace prices
    df = df.merge(
        df_mp[['product_id', 'region', 'final_min_price', 'final_max_price', 
               'final_mod_price', 'final_true_min', 'final_true_max']],
        on=['product_id', 'region'],
        how='left'
    )
    
    # 5. Merge scrapped prices
    df = df.merge(
        df_scrapped[['product_id', 'region', 'min_scrapped', 'scrapped25', 
                     'scrapped50', 'scrapped75', 'max_scrapped']],
        on=['product_id', 'region'],
        how='left'
    )
    
    # 6. Fill missing prices from group-level data
    print("\n  Processing group-level prices...")
    df = fill_missing_prices_from_groups(df)
    
    # 7. Apply price analysis to get percentiles
    print("\n  Calculating market price percentiles...")
    df[['minimum', 'percentile_25', 'percentile_50', 'percentile_75', 'maximum']] = \
        df.apply(price_analysis, axis=1, result_type='expand')
    
    # 8. Calculate below/above market bounds
    # Only for rows with valid percentiles
    mask = df['minimum'].notna()
    df.loc[mask, ['below_market_price', 'above_market_price']] = \
        df[mask].apply(calculate_step_bounds, axis=1, result_type='expand')
    
    # 9. Convert prices to margins
    print("  Converting prices to margins...")
    
    # below_market = (price - wac) / price  (margin formula)
    df['below_market'] = (df['below_market_price'] - df['wac_p']) / df['below_market_price']
    df['market_min'] = (df['minimum'] - df['wac_p']) / df['minimum']
    df['market_25'] = (df['percentile_25'] - df['wac_p']) / df['percentile_25']
    df['market_50'] = (df['percentile_50'] - df['wac_p']) / df['percentile_50']
    df['market_75'] = (df['percentile_75'] - df['wac_p']) / df['percentile_75']
    df['market_max'] = (df['maximum'] - df['wac_p']) / df['maximum']
    df['above_market'] = (df['above_market_price'] - df['wac_p']) / df['above_market_price']
    
    # 10. Select only the market columns to return
    market_cols = [
        'product_id', 'warehouse_id',
        # Raw prices
        'minimum', 'percentile_25', 'percentile_50', 'percentile_75', 'maximum',
        # Margin tiers
        'below_market', 'market_min', 'market_25', 'market_50', 
        'market_75', 'market_max', 'above_market'
    ]
    
    # Keep only columns that exist
    market_cols = [c for c in market_cols if c in df.columns]
    
    print(f"\n  Market prices processed for {len(df)} records")
    print(f"  Columns: {market_cols}")
    
    return df[market_cols]


print("get_market_prices function defined ✓")


In [None]:
# =============================================================================
# MARGIN TIERS CALCULATION
# =============================================================================

def calculate_margin_tiers(df_base: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate margin tiers for products.
    
    Creates 8 margin tiers based on effective_min_margin and max_boundary:
    - margin_tier_below: 1 step below minimum
    - margin_tier_1 to margin_tier_5: Within range
    - margin_tier_above_1, margin_tier_above_2: Above maximum
    
    Args:
        df_base: DataFrame with product_id, warehouse_id, effective_min_margin,
                 max_boundary, margin_step columns
                 
    Returns:
        DataFrame with margin tier columns added
    """
    print("\n" + "="*60)
    print("CALCULATING MARGIN TIERS")
    print("="*60)
    
    df = df_base.copy()
    
    # Check required columns
    required_cols = ['product_id', 'effective_min_margin', 'max_boundary', 'margin_step']
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        print(f"  ⚠️ Missing columns for margin tiers: {missing}")
        print("  Using avg_margin to create default tiers...")
        
        # Create default margin step if not available
        if 'margin_step' not in df.columns:
            df['margin_step'] = df['std'].fillna(0.01) * 2  # Default step
            
        if 'effective_min_margin' not in df.columns:
            df['effective_min_margin'] = df['target_margin'] - df['margin_step']
            
        if 'max_boundary' not in df.columns:
            df['max_boundary'] = df['target_margin'] + (4 * df['margin_step'])
    
    # Calculate margin tiers
    print("  Creating margin tier structure...")
    
    # Below minimum (1 step below)
    df['margin_tier_below'] = df['effective_min_margin'] - df['margin_step']
    
    # 5 tiers in range (equally spaced)
    df['margin_tier_1'] = df['effective_min_margin']  # Min
    df['margin_tier_2'] = df['effective_min_margin'] + df['margin_step']
    df['margin_tier_3'] = df['effective_min_margin'] + 2 * df['margin_step']
    df['margin_tier_4'] = df['effective_min_margin'] + 3 * df['margin_step']
    df['margin_tier_5'] = df['max_boundary']  # Max
    
    # Above maximum (2 steps above)
    df['margin_tier_above_1'] = df['max_boundary'] + df['margin_step']
    df['margin_tier_above_2'] = df['max_boundary'] + 2 * df['margin_step']
    
    # Select margin tier columns
    margin_tier_cols = [
        'product_id', 'warehouse_id',
        'margin_tier_below', 'margin_tier_1', 'margin_tier_2', 'margin_tier_3',
        'margin_tier_4', 'margin_tier_5', 'margin_tier_above_1', 'margin_tier_above_2'
    ]
    
    # Keep only columns that exist
    margin_tier_cols = [c for c in margin_tier_cols if c in df.columns]
    
    print(f"\n  Margin tiers calculated for {len(df)} records")
    print(f"  Tier structure:")
    print(f"    margin_tier_below:   effective_min - step")
    print(f"    margin_tier_1:       effective_min_margin")
    print(f"    margin_tier_2:       effective_min + 1*step")
    print(f"    margin_tier_3:       effective_min + 2*step")
    print(f"    margin_tier_4:       effective_min + 3*step")
    print(f"    margin_tier_5:       max_boundary")
    print(f"    margin_tier_above_1: max_boundary + 1*step")
    print(f"    margin_tier_above_2: max_boundary + 2*step")
    
    return df[margin_tier_cols]


print("calculate_margin_tiers function defined ✓")


In [None]:
# =============================================================================
# COMBINED FUNCTION - GET ALL MARKET AND MARGIN DATA
# =============================================================================

def get_market_and_margin_data(df_base: pd.DataFrame) -> pd.DataFrame:
    """
    Combined function to fetch fresh market prices and calculate margin tiers.
    
    This is the main entry point for getting all market/margin data refreshed.
    
    Args:
        df_base: DataFrame with at least:
            - product_id, warehouse_id
            - wac_p, avg_margin, std, target_margin
            - effective_min_margin, max_boundary, margin_step (optional)
            
    Returns:
        DataFrame with market prices and margin tiers added:
        
        Market Prices (as margins):
            - below_market, market_min, market_25, market_50, market_75, market_max, above_market
            
        Price Percentiles (as EGP):
            - minimum, percentile_25, percentile_50, percentile_75, maximum
            
        Margin Tiers:
            - margin_tier_below, margin_tier_1, margin_tier_2, margin_tier_3,
              margin_tier_4, margin_tier_5, margin_tier_above_1, margin_tier_above_2
              
    Usage:
        %run market_data_module.ipynb
        df_with_market = get_market_and_margin_data(df_input)
    """
    print("\n" + "="*70)
    print("REFRESHING MARKET PRICES AND MARGIN TIERS")
    print("="*70)
    print(f"Timestamp: {datetime.now(CAIRO_TZ).strftime('%Y-%m-%d %H:%M:%S')} Cairo time")
    print(f"Input records: {len(df_base)}")
    
    df = df_base.copy()
    
    # =================================
    # STEP 1: Get Market Prices
    # =================================
    try:
        df_market = get_market_prices(df)
        
        # Drop existing market columns to avoid duplicates
        market_cols_to_drop = [
            'below_market', 'market_min', 'market_25', 'market_50', 
            'market_75', 'market_max', 'above_market',
            'minimum', 'percentile_25', 'percentile_50', 'percentile_75', 'maximum'
        ]
        df = df.drop(columns=[c for c in market_cols_to_drop if c in df.columns], errors='ignore')
        
        # Merge market data
        merge_keys = ['product_id']
        if 'warehouse_id' in df_market.columns and 'warehouse_id' in df.columns:
            merge_keys.append('warehouse_id')
        
        df = df.merge(df_market, on=merge_keys, how='left')
        print(f"\n  ✓ Market prices merged")
        
    except Exception as e:
        print(f"\n  ⚠️ Error fetching market prices: {e}")
        print("  Continuing without market price refresh...")
    
    # =================================
    # STEP 2: Calculate Margin Tiers
    # =================================
    try:
        df_tiers = calculate_margin_tiers(df)
        
        # Drop existing margin tier columns to avoid duplicates
        tier_cols_to_drop = [
            'margin_tier_below', 'margin_tier_1', 'margin_tier_2', 'margin_tier_3',
            'margin_tier_4', 'margin_tier_5', 'margin_tier_above_1', 'margin_tier_above_2'
        ]
        df = df.drop(columns=[c for c in tier_cols_to_drop if c in df.columns], errors='ignore')
        
        # Merge margin tiers
        merge_keys = ['product_id']
        if 'warehouse_id' in df_tiers.columns and 'warehouse_id' in df.columns:
            merge_keys.append('warehouse_id')
            
        df = df.merge(df_tiers, on=merge_keys, how='left')
        print(f"\n  ✓ Margin tiers merged")
        
    except Exception as e:
        print(f"\n  ⚠️ Error calculating margin tiers: {e}")
        print("  Continuing without margin tier refresh...")
    
    # =================================
    # SUMMARY
    # =================================
    print("\n" + "="*60)
    print("MARKET DATA REFRESH COMPLETE")
    print("="*60)
    print(f"Output records: {len(df)}")
    
    # List available market/margin columns
    market_cols = [c for c in df.columns if 'market' in c.lower() or 'margin_tier' in c.lower() or 'percentile' in c.lower()]
    print(f"Market/Margin columns available: {len(market_cols)}")
    for col in market_cols:
        non_null = df[col].notna().sum()
        print(f"  - {col}: {non_null} non-null values ({non_null/len(df)*100:.1f}%)")
    
    return df


print("get_market_and_margin_data function defined ✓")
print("\n" + "="*70)
print("MARKET DATA MODULE READY")
print("="*70)
print("Available functions:")
print("  - get_market_prices(df_base)      : Fetch market prices only")
print("  - calculate_margin_tiers(df_base) : Calculate margin tiers only")
print("  - get_market_and_margin_data(df)  : Combined refresh (recommended)")
print("="*70)
