# Queries Module - Fresh Data Queries

Refresh live data in Module 3. Market prices use MODULE_1_INPUT data.

```python
%run queries_module.ipynb

df_stocks = get_current_stocks()
df_prices = get_current_prices()
df_wac = get_current_wac()
df_cart = get_current_cart_rules()
```


In [None]:
# =============================================================================
# IMPORTS & SNOWFLAKE CONNECTION
# =============================================================================
import pandas as pd
import snowflake.connector
import os
import sys

# Add parent directory to path to import setup_environment_2
sys.path.append('..')
import setup_environment_2

# Initialize environment variables (loads Snowflake credentials)
setup_environment_2.initialize_env()

def query_snowflake(query):
    """Execute a query on Snowflake and return results as DataFrame."""
    con = snowflake.connector.connect(
        user=os.environ["SNOWFLAKE_USERNAME"],
        account=os.environ["SNOWFLAKE_ACCOUNT"],
        password=os.environ["SNOWFLAKE_PASSWORD"],
        database=os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        data = cur.fetchall()
        columns = [desc[0].lower() for desc in cur.description]
        return pd.DataFrame(data, columns=columns)
    finally:
        con.close()

def get_snowflake_timezone():
    result = query_snowflake("SHOW PARAMETERS LIKE 'TIMEZONE'")
    return result.value[0] if len(result) > 0 else "UTC"

TIMEZONE = get_snowflake_timezone()
print(f"Queries Module | Timezone: {TIMEZONE}")


In [None]:
# =============================================================================
# QUERY 1: CURRENT STOCKS (from data_extraction.ipynb)
# =============================================================================
STOCK_QUERY = '''
SELECT 
    pw.warehouse_id,
    pw.product_id,
    pw.available_stock::INTEGER AS stocks
FROM product_warehouse pw
WHERE pw.warehouse_id NOT IN (6, 9, 10)
    AND pw.is_basic_unit = 1
'''

def get_current_stocks():
    """Get fresh stock levels."""
    print("Fetching current stocks...")
    df = query_snowflake(STOCK_QUERY)
    print(f"  Loaded {len(df)} records")
    return df


In [None]:
query = f'''
SELECT DISTINCT 
    qd.id AS discount_id,
    qd.dynamic_tag_id AS tag_id,
    qd.start_at,
    qd.end_at
FROM quantity_discounts qd 
WHERE qd.active = TRUE
    AND CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) 
        BETWEEN qd.start_at AND qd.end_at
'''
def get_active_qd_now():
    """Get fresh stock levels."""
    print("Fetching  qd ...")
    df = query_snowflake(query)
    print(f"  Loaded {len(df)} records")
    return df

In [None]:
# =============================================================================
# QUERY 2: packing units
# =============================================================================
packing_units_QUERY = '''
with sales_check as (


SELECT  DISTINCT
		pso.product_id,
		pso.PACKING_UNIT_id,
        sum(pso.total_price) as nmv

FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
             

WHERE so.created_at >= current_date - 60 
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0


GROUP BY 1,2
)
select product_id,packing_unit_id,basic_unit_count
from (
select *,max(nmv)over(partition by product_id,is_basic_unit) as top_nmv
from (
select 
pup.product_id,
pup.PACKING_UNIT_id,
pup.basic_unit_count,
pup.is_basic_unit,
count(distinct case when pup.basic_unit_count = 1 then pup.PACKING_UNIT_id end) over(partition by pup.product_id) as total_basic,
nmv
from PACKING_UNIT_PRODUCTS pup
left join sales_check sc on pup.product_id =sc.product_id and pup.PACKING_UNIT_id = sc.PACKING_UNIT_id
where pup.deleted_at is null
)

qualify case when total_basic > 1 then  nmv = top_nmv else true end
)
'''

def get_packing_units():
    """Get fresh stock levels."""
    print("Fetching packing_units ...")
    df = query_snowflake(packing_units_QUERY)
    print(f"  Loaded {len(df)} records")
    return df


In [None]:
# =============================================================================
# QUERY 2: CURRENT PRICES (from data_extraction.ipynb)
# =============================================================================
CURRENT_PRICES_QUERY = f'''
WITH skus_prices AS (
    WITH local_prices AS (
        SELECT  
            CASE 
                WHEN cpu.cohort_id IN (700, 695) THEN 'Cairo'
                WHEN cpu.cohort_id IN (701) THEN 'Giza'
                WHEN cpu.cohort_id IN (704, 698) THEN 'Delta East'
                WHEN cpu.cohort_id IN (703, 697) THEN 'Delta West'
                WHEN cpu.cohort_id IN (696, 1123, 1124, 1125, 1126) THEN 'Upper Egypt'
                WHEN cpu.cohort_id IN (702, 699) THEN 'Alexandria'
            END AS region,
            cohort_id,
            pu.product_id,
            pu.packing_unit_id,
            pu.basic_unit_count,
            AVG(cpu.price) AS price
        FROM cohort_product_packing_units cpu
        JOIN PACKING_UNIT_PRODUCTS pu ON pu.id = cpu.product_packing_unit_id
        WHERE cpu.cohort_id IN (700,701,702,703,704,695,696,697,698,699,1123,1124,1125,1126)
            AND cpu.created_at::date <> '2023-07-31'
            AND cpu.is_customized = TRUE
        GROUP BY ALL
    ),
    
    live_prices AS (
        SELECT 
            region, cohort_id, product_id, 
            pu_id AS packing_unit_id, 
            buc AS basic_unit_count, 
            NEW_PRICE AS price
        FROM materialized_views.DBDP_PRICES
        WHERE created_at = CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date
            AND DATE_PART('hour', CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::time) 
                BETWEEN SPLIT_PART(time_slot, '-', 1)::int AND (SPLIT_PART(time_slot, '-', 1)::int) + 1
            AND cohort_id IN (700,701,702,703,704,695,696,697,698,699,1123,1124,1125,1126)
    ),
    
    prices AS (
        SELECT *
        FROM (
            SELECT *, 1 AS priority FROM live_prices
            UNION ALL
            SELECT *, 2 AS priority FROM local_prices
        )
        QUALIFY ROW_NUMBER() OVER (PARTITION BY region, cohort_id, product_id, packing_unit_id ORDER BY priority) = 1
    )
    
    SELECT region, cohort_id, product_id, price
    FROM prices
    WHERE basic_unit_count = 1
        AND ((product_id = 1309 AND packing_unit_id = 2) OR (product_id <> 1309))
)

SELECT distinct region, cohort_id, product_id, price as current_price
FROM skus_prices
'''

def get_current_prices():
    """Get fresh current prices."""
    print("Fetching current prices...")
    df = query_snowflake(CURRENT_PRICES_QUERY)
    print(f"  Loaded {len(df)} records")
    return df


In [None]:
# =============================================================================
# QUERY 3: CURRENT WAC (from data_extraction.ipynb)
# =============================================================================
WAC_QUERY = f'''
SELECT 
    product_id,
    wac_p
FROM finance.all_cogs
WHERE CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) 
    BETWEEN from_date AND to_date
'''

def get_current_wac():
    """Get fresh WAC (Weighted Average Cost)."""
    print("Fetching current WAC...")
    df = query_snowflake(WAC_QUERY)
    print(f"  Loaded {len(df)} records")
    return df


In [None]:
# =============================================================================
# QUERY 4: CURRENT CART RULES (from data_extraction.ipynb)
# =============================================================================
COHORT_IDS = [700, 701, 702, 703, 704, 1123, 1124, 1125, 1126]

CART_RULES_QUERY = f'''
SELECT 
    cppu.cohort_id,
    pup.product_id,
    pup.basic_unit_count,
    COALESCE(cppu.MAX_PER_SALES_ORDER, cppu2.MAX_PER_SALES_ORDER) AS current_cart_rule
FROM COHORT_PRODUCT_PACKING_UNITS cppu 
JOIN PACKING_UNIT_PRODUCTS pup ON cppu.PRODUCT_PACKING_UNIT_ID = pup.id 
JOIN cohorts c ON c.id = cppu.cohort_id
LEFT JOIN COHORT_PRODUCT_PACKING_UNITS cppu2 
    ON cppu.PRODUCT_PACKING_UNIT_ID = cppu2.PRODUCT_PACKING_UNIT_ID 
    AND cppu2.cohort_id = c.FALLBACK_COHORT_ID
WHERE cppu.cohort_id IN ({",".join(map(str, COHORT_IDS))})
    AND pup.basic_unit_count = 1
'''

def get_current_cart_rules():
    """Get fresh cart rules."""
    print("Fetching current cart rules...")
    df = query_snowflake(CART_RULES_QUERY)
    df = df.groupby(['cohort_id', 'product_id']).agg(
        current_cart_rule=('current_cart_rule', 'min')
    ).reset_index()
    print(f"  Loaded {len(df)} records")
    return df


In [None]:
# =============================================================================
# UTH (UP-TILL-HOUR) PERFORMANCE QUERIES
# =============================================================================
# Reusable queries for Module 3 and Module 4

def get_uth_performance():
    """
    Get today's Up-Till-Hour performance (qty and retailers from start of day to current hour).
    Uses Snowflake.
    Returns DataFrame with: warehouse_id, product_id, uth_qty, uth_nmv, uth_retailers
    """
    UTH_QUERY = f'''
    WITH params AS (
        SELECT
            CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE AS today,
            HOUR(CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())) AS current_hour
    ),
    sales_today AS (
        SELECT
            pso.warehouse_id,
            pso.product_id,
            SUM(pso.purchased_item_count) AS qty,
            SUM(pso.total_price) AS nmv,
            so.retailer_id
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        CROSS JOIN params p
        WHERE so.created_at::date = p.today
            AND HOUR(so.created_at) < p.current_hour
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
        GROUP BY pso.warehouse_id, pso.product_id, so.retailer_id
    )
    SELECT
        warehouse_id,
        product_id,
        SUM(qty) AS uth_qty,
        SUM(nmv) AS uth_nmv,
        COUNT(DISTINCT retailer_id) AS uth_retailers
    FROM sales_today
    GROUP BY warehouse_id, product_id
    '''
    print("Fetching UTH performance from Snowflake...")
    df = query_snowflake(UTH_QUERY)
    print(f"  Loaded {len(df)} UTH records")
    return df


def get_hourly_distribution():
    """
    Get historical hourly distribution (last 4 months) by category and warehouse.
    Uses Snowflake.
    Returns: warehouse_id, cat, avg_uth_pct_qty, avg_uth_pct_retailers, avg_last_hour_pct_qty, avg_last_hour_pct_retailers
    Note: Converts decimal.Decimal to float for compatibility.
    """
    HOURLY_DIST_QUERY = f'''
    WITH params AS (
        SELECT
            CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE AS today,
            CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 120 AS history_start,
            HOUR(CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())) AS current_hour,
            HOUR(CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())) - 1 AS last_hour
    ),
    hourly_sales AS (
        SELECT
            pso.warehouse_id,
            c.name_ar AS cat,
            so.created_at::date AS sale_date,
            HOUR(so.created_at) AS sale_hour,
            SUM(pso.purchased_item_count) AS qty,
            COUNT(DISTINCT so.retailer_id) AS retailers
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN products p ON p.id = pso.product_id
        JOIN categories c ON c.id = p.category_id
        CROSS JOIN params
        WHERE so.created_at::date BETWEEN params.history_start AND params.today - 1
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
        GROUP BY pso.warehouse_id, c.name_ar, so.created_at::date, sale_hour
    ),
    daily_totals AS (
        SELECT warehouse_id, cat, sale_date,
            SUM(qty) AS day_total_qty,
            SUM(retailers) AS day_total_retailers
        FROM hourly_sales
        GROUP BY warehouse_id, cat, sale_date
    ),
    uth_totals AS (
        SELECT hs.warehouse_id, hs.cat, hs.sale_date,
            SUM(hs.qty) AS uth_total_qty,
            SUM(hs.retailers) AS uth_total_retailers
        FROM hourly_sales hs
        CROSS JOIN params p
        WHERE hs.sale_hour < p.current_hour
        GROUP BY hs.warehouse_id, hs.cat, hs.sale_date
    ),
    last_hour_totals AS (
        SELECT hs.warehouse_id, hs.cat, hs.sale_date,
            SUM(hs.qty) AS last_hour_total_qty,
            SUM(hs.retailers) AS last_hour_total_retailers
        FROM hourly_sales hs
        CROSS JOIN params p
        WHERE hs.sale_hour = p.last_hour
        GROUP BY hs.warehouse_id, hs.cat, hs.sale_date
    )
    SELECT
        dt.warehouse_id, dt.cat,
        AVG(COALESCE(ut.uth_total_qty, 0) / NULLIF(dt.day_total_qty, 0)) AS avg_uth_pct_qty,
        AVG(COALESCE(ut.uth_total_retailers, 0) / NULLIF(dt.day_total_retailers, 0)) AS avg_uth_pct_retailers,
        AVG(COALESCE(lh.last_hour_total_qty, 0) / NULLIF(dt.day_total_qty, 0)) AS avg_last_hour_pct_qty,
        AVG(COALESCE(lh.last_hour_total_retailers, 0) / NULLIF(dt.day_total_retailers, 0)) AS avg_last_hour_pct_retailers
    FROM daily_totals dt
    LEFT JOIN uth_totals ut ON dt.warehouse_id = ut.warehouse_id 
        AND dt.cat = ut.cat AND dt.sale_date = ut.sale_date
    LEFT JOIN last_hour_totals lh ON dt.warehouse_id = lh.warehouse_id 
        AND dt.cat = lh.cat AND dt.sale_date = lh.sale_date
    WHERE dt.day_total_qty > 0
    GROUP BY dt.warehouse_id, dt.cat
    '''
    print("Fetching hourly distribution from Snowflake...")
    df = query_snowflake(HOURLY_DIST_QUERY)
    # Convert decimal.Decimal to float for compatibility
    for col in ['avg_uth_pct_qty', 'avg_uth_pct_retailers', 'avg_last_hour_pct_qty', 'avg_last_hour_pct_retailers']:
        if col in df.columns:
            df[col] = df[col].astype(float)
    print(f"  Loaded {len(df)} hourly distribution records")
    return df


def get_last_hour_performance():
    """
    Get last hour performance from DWH (PostgreSQL).
    Returns DataFrame with: warehouse_id, product_id, last_hour_qty, last_hour_nmv, last_hour_retailers
    """
    LAST_HOUR_QUERY = f'''
SELECT
    pso.warehouse_id,
    pso.product_id,
    SUM(pso.purchased_item_count) AS last_hour_qty,
    SUM(pso.total_price) AS last_hour_nmv,
    COUNT(DISTINCT so.retailer_id) AS last_hour_retailers
FROM product_sales_order pso
JOIN sales_orders so 
    ON so.id = pso.sales_order_id
WHERE so.created_at::date =
      (CURRENT_TIMESTAMP AT TIME ZONE 'Africa/Cairo')::date
AND EXTRACT(
        HOUR FROM so.created_at
    ) =
    EXTRACT(
        HOUR FROM CURRENT_TIMESTAMP AT TIME ZONE 'Africa/Cairo'
    ) - 1
AND so.sales_order_status_id NOT IN (7, 12)
AND so.channel IN ('telesales', 'retailer')
AND pso.purchased_item_count <> 0
GROUP BY pso.warehouse_id, pso.product_id;


    '''
    print("Fetching last hour performance from DWH...")
    df = setup_environment_2.dwh_pg_query(
        LAST_HOUR_QUERY, 
        columns=['warehouse_id', 'product_id', 'last_hour_qty', 'last_hour_nmv', 'last_hour_retailers']
    )
    df.columns = df.columns.str.lower()
    # Convert to numeric
    for col in ['warehouse_id', 'product_id', 'last_hour_qty', 'last_hour_nmv', 'last_hour_retailers']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    print(f"  Loaded {len(df)} last hour records from DWH")
    return df

print("✅ UTH and Last Hour functions defined")


In [None]:
# =============================================================================
# READY
# =============================================================================
print("\n" + "="*50)
print("QUERIES MODULE READY")
print("="*50)
print("\nLive Data Functions:")
print("  • get_current_stocks()")
print("  • get_packing_units()")
print("  • get_current_prices()")
print("  • get_current_wac()")
print("  • get_current_cart_rules()")
print("\nUTH Performance Functions:")
print("  • get_uth_performance()         - UTH qty/retailers (Snowflake)")
print("  • get_hourly_distribution()     - Historical hour contributions (Snowflake)")
print("  • get_last_hour_performance()   - Last hour qty/retailers (DWH)")
print("\nNote: Market prices use MODULE_1_INPUT data")


In [None]:
# =============================================================================
# RETAILER SELECTION QUERIES (for SKU Discount Handler)
# =============================================================================

def get_churned_dropped_retailers(selected_skus_tuple: str) -> pd.DataFrame:
    """
    Query 1: Get retailers who were buying this product but dropped >30%.
    These are churned/dropping retailers who might respond to a discount.
    
    Args:
        selected_skus_tuple: String of tuples like "(1, 2), (3, 4)"
    
    Returns:
        DataFrame with retailer_id, product_id, warehouse_id
    """
    query = f'''
    WITH selected_prods AS (
        SELECT * 
        FROM (VALUES {selected_skus_tuple}) x(product_id, warehouse_id)
    ),
    sales_before AS (
        SELECT retailer_id, product_id, warehouse_id, avg(nmv) as avg_nmv_before
        FROM (
            SELECT DISTINCT
                so.id as order_id,
                sp.warehouse_id as warehouse_id,
                pso.product_id as product_id,
                so.retailer_id as retailer_id,
                sum(pso.total_price) as nmv 
            FROM product_sales_order pso
            JOIN sales_orders so ON so.id = pso.sales_order_id
            JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
            JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
            JOIN selected_prods sp ON sp.product_id = pso.product_id AND sp.warehouse_id = pso.warehouse_id 
            WHERE so.created_at::date BETWEEN CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 120 
                  AND CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 31
                AND so.sales_order_status_id NOT IN (7, 12)
                AND so.channel IN ('telesales', 'retailer')
                AND pso.purchased_item_count <> 0
            GROUP BY ALL
        )
        GROUP BY ALL 
    ),
    sales_after AS (
        SELECT retailer_id, product_id, warehouse_id, avg(nmv) as avg_nmv_after, max(order_date) as last_order
        FROM (
            SELECT DISTINCT
                so.id as order_id,
                so.created_at::date as order_date,
                sales_order_status_id, 
                sp.warehouse_id as warehouse_id,
                pso.product_id as product_id,
                so.retailer_id as retailer_id,
                sum(pso.total_price) as nmv 
            FROM product_sales_order pso
            JOIN sales_orders so ON so.id = pso.sales_order_id
            JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
            JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
            JOIN selected_prods sp ON sp.product_id = pso.product_id AND sp.warehouse_id = pso.warehouse_id 
            WHERE so.created_at::date > CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 31
                AND so.sales_order_status_id NOT IN (7, 12)
                AND so.channel IN ('telesales', 'retailer')
                AND pso.purchased_item_count <> 0
            GROUP BY ALL
        )
        GROUP BY ALL 
    ),
    made_order AS (
        SELECT DISTINCT so.retailer_id
        FROM sales_orders so 
        JOIN product_sales_order pso ON pso.sales_order_id = so.id 
        JOIN selected_prods sp ON sp.warehouse_id = pso.warehouse_id
        WHERE so.created_at::date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 60
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
        GROUP BY ALL
    )
    SELECT DISTINCT retailer_id, product_id, warehouse_id
    FROM (
        SELECT sb.*, COALESCE(avg_nmv_after, 0) as nmv_after, 
               (nmv_after - avg_nmv_before) / avg_nmv_before as growth
        FROM sales_before sb 
        LEFT JOIN sales_after sa ON sb.retailer_id = sa.retailer_id AND sb.product_id = sa.product_id 
        LEFT JOIN made_order mo ON mo.retailer_id = sa.retailer_id 
        WHERE growth < -0.3
            AND (CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - last_order >= 5 OR last_order IS NULL)
            AND mo.retailer_id IS NOT NULL 
    )
    '''
    print("  Fetching churned/dropped retailers...")
    df = query_snowflake(query)
    print(f"    Found {len(df)} churned/dropped retailer-product combinations")
    return df


def get_category_not_product_retailers(selected_skus_tuple: str) -> pd.DataFrame:
    """
    Query 2: Get retailers who buy the category but not this specific product.
    These are potential new customers for the product.
    
    Args:
        selected_skus_tuple: String of tuples like "(1, 2), (3, 4)"
    
    Returns:
        DataFrame with retailer_id, product_id, warehouse_id
    """
    query = f'''
    WITH selected_prods AS (
        SELECT * 
        FROM (VALUES {selected_skus_tuple}) x(product_id, warehouse_id)
    ),
    selected_prods_with_cat AS (
        SELECT DISTINCT sp.warehouse_id, sp.product_id, c.name_ar as cat, b.name_ar as brand
        FROM selected_prods sp
        JOIN products p ON p.id = sp.product_id
        JOIN brands b ON b.id = p.brand_id 
        JOIN categories c ON c.id = p.category_id 
    ),
    selected_dis_cat_brand AS (
        SELECT DISTINCT warehouse_id, cat
        FROM selected_prods_with_cat
    ),
    buy_cat AS (
        SELECT DISTINCT
            sd.warehouse_id as warehouse_id,
            so.retailer_id as retailer_id,
            c.name_ar as cat,
            b.name_ar as brand,
            pso.product_id
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN products p ON p.id = pso.product_id
        JOIN brands b ON b.id = p.brand_id 
        JOIN categories c ON c.id = p.category_id 
        JOIN selected_dis_cat_brand sd ON sd.cat = c.name_ar AND sd.warehouse_id = pso.warehouse_id
        WHERE so.created_at::date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 60
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
    ),
    chosen_products AS (
        SELECT sp.*, c.name_ar as cat, b.name_ar as brand
        FROM selected_prods sp 
        JOIN products p ON p.id = sp.product_id
        JOIN brands b ON b.id = p.brand_id 
        JOIN categories c ON c.id = p.category_id 
    )
    SELECT DISTINCT retailer_id, selected_product_id as product_id, warehouse_id
    FROM (
        SELECT warehouse_id, retailer_id, cat, brand, selected_product_id, max(flag) as flag
        FROM (
            SELECT bc.*, cp.product_id as selected_product_id,
                CASE WHEN cp.product_id = bc.product_id THEN 1 ELSE 0 END as flag 
            FROM buy_cat bc 
            LEFT JOIN chosen_products cp ON cp.warehouse_id = bc.warehouse_id AND cp.cat = bc.cat 
        )
        GROUP BY ALL 
    )
    WHERE flag = 0 
    '''
    print("  Fetching category-not-product retailers...")
    df = query_snowflake(query)
    print(f"    Found {len(df)} category-not-product retailer-product combinations")
    return df


def get_out_of_cycle_retailers(selected_skus_tuple: str) -> pd.DataFrame:
    """
    Query 3: Get retailers who should have reordered by now based on their purchase cycle.
    
    Args:
        selected_skus_tuple: String of tuples like "(1, 2), (3, 4)"
    
    Returns:
        DataFrame with retailer_id, product_id, warehouse_id
    """
    query = f'''
    WITH selected_prods AS (
        SELECT * 
        FROM (VALUES {selected_skus_tuple}) x(product_id, warehouse_id)
    )
    SELECT retailer_id, product_id, warehouse_id
    FROM (
        SELECT *, last_o_date + floor(avg_cycle + (2.5 * std))::int as next_order
        FROM (
            SELECT retailer_id, product_id, warehouse_id, max(last_o_date) as last_o_date, 
                sum(order_days * (w / all_w)) as avg_cycle, stddev(order_days) as std
            FROM (
                SELECT *,
                    max(order_num) OVER(PARTITION BY retailer_id, product_id) as max_orders,
                    lag(o_date) OVER(PARTITION BY product_id, retailer_id ORDER BY o_date) as prev_order,
                    o_date - prev_order as order_days,
                    CASE WHEN CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - o_date = 0 
                         THEN 1 ELSE 1 / (CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - o_date) END as w,
                    sum(w) OVER(PARTITION BY product_id, retailer_id) as all_w
                FROM (
                    SELECT DISTINCT
                        so.id as order_id,
                        so.created_at::date as o_date,
                        sp.warehouse_id as warehouse_id,
                        pso.product_id as product_id,
                        so.retailer_id as retailer_id,
                        sum(pso.total_price) as nmv,
                        row_number() OVER(PARTITION BY so.retailer_id, pso.product_id ORDER BY o_date DESC) as order_num,
                        max(o_date) OVER(PARTITION BY so.retailer_id, pso.product_id) as last_o_date
                    FROM product_sales_order pso
                    JOIN sales_orders so ON so.id = pso.sales_order_id
                    JOIN selected_prods sp ON sp.product_id = pso.product_id AND sp.warehouse_id = pso.warehouse_id 
                    WHERE so.created_at::date >= DATE_TRUNC('month', CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - INTERVAL '1 year')
                        AND so.sales_order_status_id NOT IN (7, 12)
                        AND so.channel IN ('telesales', 'retailer')
                        AND pso.purchased_item_count <> 0
                    GROUP BY 1, 2, 3, 4, 5
                )
                WHERE last_o_date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 60
                QUALIFY max_orders >= 4
            )
            WHERE prev_order IS NOT NULL 
            GROUP BY ALL
        )
        WHERE CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE >= next_order
    )
    '''
    print("  Fetching out-of-cycle retailers...")
    df = query_snowflake(query)
    print(f"    Found {len(df)} out-of-cycle retailer-product combinations")
    return df


def get_view_no_orders_retailers(selected_skus_tuple: str) -> pd.DataFrame:
    """
    Query 4: Get retailers who viewed the brand but didn't order.
    
    Args:
        selected_skus_tuple: String of tuples like "(1, 2), (3, 4)"
    
    Returns:
        DataFrame with retailer_id, product_id, warehouse_id
    """
    query = f'''
    WITH selected_prods AS (
        SELECT * 
        FROM (VALUES {selected_skus_tuple}) x(product_id, warehouse_id)
    ),
    selected_prods_with_brand_cat AS (
        SELECT DISTINCT sp.warehouse_id, c.id as cat_id, b.id as brand_id, sp.product_id
        FROM selected_prods sp
        JOIN products p ON p.id = sp.product_id 
        JOIN brands b ON b.id = p.brand_id 
        JOIN categories c ON c.id = p.category_id
    ),
    brand_open AS (
        SELECT        
            event_date,
            event_timestamp,
            vb.retailer_id,
            vb.brand_id,
            vb.brand_name,
            vb.category_id,
            c.name_ar as cat_name
        FROM maxab_events.view_brand vb
        JOIN categories c ON c.id = vb.category_id
        WHERE event_timestamp::date BETWEEN CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 10 
              AND CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 2
            AND country LIKE '%Egypt%'
            AND user_id LIKE '%EG_retailers_%'
            AND brand_id <> 'null'
    ),
    add_to_cart AS (
        SELECT 
            event_date,
            event_timestamp,
            uc.retailer_id,
            productsid AS product_id,
            b.id as brand_id
        FROM maxab_events.update_cart uc
        JOIN products p ON p.id = uc.productsid 
        JOIN brands b ON b.id = p.brand_id 
        WHERE event_timestamp::date BETWEEN CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 10 
              AND CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 2
            AND country LIKE '%Egypt%'
            AND update_type = 'add'
            AND user_id LIKE '%EG_retailers_%'
            AND productsid REGEXP '^[0-9]+$'
    ),
    in_stock_retailers AS (
        SELECT DISTINCT retailer_id 
        FROM sales_orders 
        WHERE sales_order_status_id = 6 
            AND channel IN ('retailer', 'telesales')
            AND created_at::date >= DATE_TRUNC('month', CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - INTERVAL '6 months')
    ),
    sales_data AS (
        SELECT so.retailer_id, b.name_ar as brand, c.name_ar as cat, max(so.created_at::date) as o_date
        FROM sales_orders so
        JOIN PRODUCT_SALES_ORDER pso ON pso.sales_order_id = so.id 
        JOIN products p ON p.id = pso.product_id
        JOIN brands b ON b.id = p.brand_id 
        JOIN categories c ON c.id = p.category_id
        WHERE so.created_at::date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 10  
            AND sales_order_status_id NOT IN (7, 12)
        GROUP BY ALL
    ),
    cat_brand AS (
        SELECT DISTINCT c.id as cat, b.id as brand 
        FROM sales_orders so
        JOIN PRODUCT_SALES_ORDER pso ON pso.sales_order_id = so.id 
        JOIN products p ON p.id = pso.product_id
        JOIN brands b ON b.id = p.brand_id 
        JOIN categories c ON c.id = p.category_id
        WHERE so.created_at::date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 120 
            AND sales_order_status_id NOT IN (7, 12)
    ),
    main_cte AS (
        SELECT * 
        FROM (
            SELECT x.*, CASE WHEN sd.retailer_id IS NOT NULL THEN 1 ELSE 0 END as ordered 
            FROM (
                SELECT *, max(event_date) OVER(PARTITION BY retailer_id, brand_id, category_id) as last_event
                FROM (
                    SELECT event_date, retailer_id, brand_id, brand_name, category_id,
                        cat_name, sum(count_n) as total_count
                    FROM (
                        SELECT bo.*, count(DISTINCT atc.product_id) as count_n
                        FROM brand_open bo 
                        JOIN cat_brand cb ON bo.category_id = cb.cat AND bo.brand_id = cb.brand
                        JOIN in_stock_retailers isr ON isr.retailer_id = bo.retailer_id 
                        LEFT JOIN add_to_cart atc ON bo.retailer_id = atc.retailer_id AND bo.brand_id = atc.brand_id AND atc.event_timestamp >= bo.event_timestamp
                        GROUP BY ALL 
                    )
                    GROUP BY ALL 
                )
                QUALIFY event_date = last_event
            ) x 
            LEFT JOIN sales_data sd ON sd.retailer_id = x.retailer_id AND x.cat_name = sd.cat AND x.brand_name = sd.brand AND x.event_date <= sd.o_date
        )
        WHERE ordered = 0 AND total_count = 0 
    )
    SELECT DISTINCT m.retailer_id, sp.product_id, sp.warehouse_id
    FROM main_cte m 
    JOIN selected_prods_with_brand_cat sp ON sp.brand_id = m.brand_id AND sp.cat_id = m.category_id 
    JOIN materialized_views.retailer_polygon rp ON rp.retailer_id = m.retailer_id 
    JOIN WAREHOUSE_DISPATCHING_RULES wdr ON wdr.product_id = sp.product_id AND wdr.warehouse_id = sp.warehouse_id
    JOIN DISPATCHING_POLYGONS dp ON dp.id = wdr.DISPATCHING_POLYGON_ID AND dp.district_id = rp.district_id
    '''
    print("  Fetching view-no-orders retailers...")
    df = query_snowflake(query)
    print(f"    Found {len(df)} view-no-orders retailer-product combinations")
    return df


def get_excluded_retailers() -> pd.DataFrame:
    """
    Get retailers to exclude from SKU discounts.
    Excludes:
    - Retailers with failed last orders
    - Inactive retailers
    - Wholesale tagged retailers
    - Retailers already having active SKU discounts
    
    Returns:
        DataFrame with retailer_id column
    """
    query = f'''
    SELECT retailer_id
    FROM (
        SELECT DISTINCT
            retailer_id,
            sales_order_status_id,
            created_at::date as o_date,
            max(o_date) OVER(PARTITION BY retailer_id) as last_order
        FROM sales_orders so 
        WHERE so.created_at::date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 120
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
        QUALIFY o_date = last_order
    )
    WHERE sales_order_status_id NOT IN (6, 9, 12)
    
    UNION ALL 
    
    SELECT id as retailer_id 
    FROM retailers 
    WHERE activation = 'false'
    
    UNION ALL 
    
    SELECT DISTINCT dta.TAGGABLE_ID as retailer_id
    FROM DYNAMIC_TAGS dt 
    JOIN dynamic_taggables dta ON dt.id = dta.dynamic_tag_id 
    WHERE name LIKE '%whole_sale%'
        AND dt.id > 3000
    '''
    print("  Fetching excluded retailers...")
    df = query_snowflake(query)
    df = df.drop_duplicates()
    print(f"    Found {len(df)} retailers to exclude")
    return df


def get_retailers_with_quantity_discount() -> pd.DataFrame:
    """
    Get retailer-product combinations that already have quantity discounts.
    These should be excluded from SKU discounts.
    
    Returns:
        DataFrame with retailer_id, product_id columns
    """
    # First get active quantity discount tags
    query_tags = f'''
    SELECT DISTINCT
        qdv.product_id,
        qd.dynamic_tag_id AS tag_id
    FROM quantity_discounts qd
    JOIN quantity_discount_values qdv ON qd.id = qdv.quantity_discount_id
    WHERE (CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) BETWEEN qd.start_at AND qd.end_at)
        OR ((qd.start_at::date = CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE) 
            AND (CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP()) < qd.start_at))
    AND qd.active = TRUE
    '''
    
    print("  Fetching retailers with quantity discounts...")
    df_tags = query_snowflake(query_tags)
    
    if len(df_tags) == 0:
        print("    No active quantity discounts found")
        return pd.DataFrame(columns=['retailer_id', 'product_id'])
    
    # Get retailers for each tag
    tag_list = ','.join([f"({int(t)})" for t in df_tags['tag_id'].unique()])
    
    query_retailers = f'''
    WITH tags AS (
        SELECT * FROM (VALUES {tag_list}) x(dynamic_tag_id)
    )
    SELECT tags.dynamic_tag_id as tag_id, taggable_id as retailer_id
    FROM dynamic_taggables dt  
    JOIN tags ON tags.dynamic_tag_id = dt.dynamic_tag_id
    '''
    df_retailers = query_snowflake(query_retailers)
    
    # Merge to get retailer-product combinations
    df_qd = df_tags.merge(df_retailers, on='tag_id')
    df_qd = df_qd[['retailer_id', 'product_id']].drop_duplicates()
    
    print(f"    Found {len(df_qd)} retailer-product combinations with quantity discounts")
    return df_qd


def get_retailer_main_warehouse() -> pd.DataFrame:
    """
    Get the main warehouse for each retailer based on last order.
    
    Returns:
        DataFrame with retailer_id, warehouse_id, last_wh columns
    """
    query = f'''
    SELECT retailer_id, warehouse_id, 1 as last_wh 
    FROM (
        SELECT DISTINCT
            so.retailer_id,
            pso.warehouse_id,
            so.created_at::date as o_date,
            max(so.created_at::date) OVER(PARTITION BY so.retailer_id) as max_date
        FROM product_sales_order pso
        JOIN sales_orders so ON so.id = pso.sales_order_id
        JOIN products ON products.id = pso.product_id
        JOIN brands ON products.brand_id = brands.id 
        JOIN categories ON products.category_id = categories.id
        JOIN finance.all_cogs f ON f.product_id = pso.product_id
            AND f.from_date::date <= so.created_at::date
            AND f.to_date::date > so.created_at::date
        JOIN product_units ON product_units.id = products.unit_id  
        WHERE so.created_at::date >= CONVERT_TIMEZONE('{TIMEZONE}', 'Africa/Cairo', CURRENT_TIMESTAMP())::DATE - 365
            AND so.sales_order_status_id NOT IN (7, 12)
            AND so.channel IN ('telesales', 'retailer')
            AND pso.purchased_item_count <> 0
            AND pso.warehouse_id IN (1, 8, 170, 236, 337, 339, 401, 501, 632, 703, 797, 962)
        GROUP BY 1, 2, 3
        QUALIFY o_date = max_date
    )
    '''
    print("  Fetching retailer main warehouses...")
    df = query_snowflake(query)
    print(f"    Found {len(df)} retailer-warehouse mappings")
    return df


print("Retailer Selection Queries defined ✓")
print("  - get_churned_dropped_retailers()")
print("  - get_category_not_product_retailers()")
print("  - get_out_of_cycle_retailers()")
print("  - get_view_no_orders_retailers()")
print("  - get_excluded_retailers()")
print("  - get_retailers_with_quantity_discount()")
print("  - get_retailer_main_warehouse()")
