In [1]:
%%capture

# Upgrade pip
!pip install --upgrade pip
# Connectivity
!pip install psycopg2-binary  # PostgreSQL adapter
# !pip install snowflake-connector-python  # Snowflake connector
!pip install snowflake-connector-python==3.15.0 # Snowflake connector Older Version
!pip install snowflake-sqlalchemy  # Snowflake SQLAlchemy connector
!pip install warnings # Warnings management
# !pip install pyarrow # Serialization
!pip install keyring==23.11.0 # Key management
!pip install sqlalchemy==1.4.46 # SQLAlchemy
!pip install requests # HTTP requests
!pip install boto3 # AWS SDK
# !pip install slackclient # Slack API
!pip install oauth2client # Google Sheets API
!pip install gspread==5.9.0 # Google Sheets API
!pip install gspread_dataframe # Google Sheets API
!pip install google.cloud # Google Cloud
# Data manipulation and analysis
!pip install polars
!pip install pandas==2.2.1
!pip install numpy
# !pip install fastparquet
!pip install openpyxl # Excel file handling
!pip install xlsxwriter # Excel file handling
# Linear programming
!pip install pulp
# Date and time handling
!pip install --upgrade datetime
!pip install python-time
!pip install --upgrade pytz
# Progress bar
!pip install tqdm
# Database data types
!pip install db-dtypes
# Geospatial data handling
# !pip install geopandas
# !pip install shapely
# !pip install fiona
# !pip install haversine
# Plotting

# Modeling
!pip install statsmodels
!pip install scikit-learn

!pip install import-ipynb

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import calendar
import json
from datetime import date, timedelta
from oauth2client.service_account import ServiceAccountCredentials
import setup_environment_2
import importlib
import import_ipynb
import warnings
warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

  from pandas.core.computation.check import NUMEXPR_INSTALLED


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


In [3]:
def query_snowflake(query, columns=[]):
    import os
    import snowflake.connector
    import numpy as np
    import pandas as pd
    con = snowflake.connector.connect(
        user =  os.environ["SNOWFLAKE_USERNAME"],
        account= os.environ["SNOWFLAKE_ACCOUNT"],
        password= os.environ["SNOWFLAKE_PASSWORD"],
        database =os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        if len(columns) == 0:
            out = pd.DataFrame(np.array(cur.fetchall()))
        else:
            out = pd.DataFrame(np.array(cur.fetchall()),columns=columns)
        return out
    except Exception as e:
        print("Error: ", e)
    finally:
        cur.close()
        con.close()

In [None]:
command_string = '''
with stocks as (
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38),
                            ('Cairo', 'Mostorod', 1),
                            ('Giza', 'Barageel', 236),
                            ('Giza', 'Basatin', 39),
                            ('Delta West', 'El-Mahala', 337),
                            ('Delta West', 'Tanta', 8),
                            ('Delta East', 'Mansoura FC', 339),
                            ('Delta East', 'Sharqya', 170),
                            ('Upper Egypt', 'Assiut FC', 501),
                            ('Upper Egypt', 'Bani sweif', 401),
                            ('Upper Egypt', 'Menya Samalot', 703),
                            ('Upper Egypt', 'Sohag', 632),
                            ('Alexandria', 'Khorshed Alex', 797))
                    x(region, wh, warehouse_id))

SELECT  region,
        product_id,
        SUM(stocks) as stocks
FROM    (SELECT DISTINCT whs.region,
                whs.wh,
                product_warehouse.product_id,
                (product_warehouse.available_stock)::integer as stocks
        from whs
        JOIN product_warehouse ON product_warehouse.warehouse_id = whs.warehouse_id
        JOIN products on product_warehouse.product_id = products.id
        JOIN product_units ON products.unit_id = product_units.id

        where   product_warehouse.warehouse_id not in (6,9,10)
            AND product_warehouse.is_basic_unit = 1
        group by 1,2,3,4)		
GROUP BY 1,2
having sum(stocks) > 0 

),
selected_skus as (
select region,product_id,cat,brand,row_number()over(partition by region,cat order by cntrb) as num_skus
from (
select *,min(case when cumulative_sum > 0.3 then cumulative_sum end) over(partition by cat, region) as thres
from (
select *,SUM(cntrb) OVER (partition by cat, region ORDER BY cntrb desc ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum
from (
select *, num_order/sum(num_order)over(partition by cat,region) as cntrb
from (
SELECT  DISTINCT
		case when regions.id = 2 then cities.name_en else regions.name_en end as region,
		pso.product_id,
		CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
		brands.name_ar as brand, 
		categories.name_ar as cat,
		count(distinct parent_sales_order_id) as num_order ,
        sum(pso.total_price) as nmv,
       sum(COALESCE(f.wac_p,0) * pso.purchased_item_count * pso.basic_unit_count) as cogs,
	   (nmv-cogs)/nmv as bm 
		

FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f  ON f.product_id = pso.product_id
                        AND f.from_date::date <= so.created_at::date
                        AND f.to_date::date > so.created_at::date
JOIN product_units ON product_units.id = products.unit_id 
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id   
join stocks s on s.product_id = pso.product_id and s.region = case when regions.id = 2 then cities.name_en else regions.name_en end

WHERE   True
    AND so.created_at::date between date_trunc('month',current_date - interval '2 months') and CURRENT_date-1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
)
)
)
)
where cumulative_sum <= thres
qualify num_skus <= 5 
),
main as (
SELECT  DISTINCT
		so.created_at::date as date,
		parent_sales_order_id,
		so.retailer_id,
		case when regions.id = 2 then cities.name_en else regions.name_en end as region,
		pso.product_id,
		CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
		packing_unit_id,
		brands.name_ar as brand, 
		categories.name_ar as cat,
		sum(pso.purchased_item_count) as qty,
        sum(pso.total_price) as nmv,
       sum(COALESCE(f.wac_p,0) * pso.purchased_item_count * pso.basic_unit_count) as cogs,
	   (nmv-cogs)/nmv as bm 
		

FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f  ON f.product_id = pso.product_id
                        AND f.from_date::date <= so.created_at::date
                        AND f.to_date::date > so.created_at::date
JOIN product_units ON product_units.id = products.unit_id 
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id 
join selected_skus ss on ss.product_id = pso.product_id and ss.region = case when regions.id = 2 then cities.name_en else regions.name_en end

WHERE   True
    AND so.created_at::date between date_trunc('month',current_date - interval '2 months') and CURRENT_date-1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
),
region_data as (
select region,product_id,sku,brand,cat,packing_unit_id,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY qty) AS region_q1,
MEDIAN(qty) as region_median,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY qty) AS region_q3,
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY qty) AS region_95,
STDDEV_POP(qty) as std
from main
group by all 
),
recent_region_data as (
select region,product_id,sku,brand,cat,packing_unit_id,
MEDIAN(qty) as recent_region_median,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY qty) AS recent_region_q3,
STDDEV_POP(qty) as recent_std
from main
where date between current_date - 8 and current_date - 1 
group by all 

),
 freq_table AS (
  SELECT
  	region,
    PRODUCT_ID,sku,brand,cat,
	packing_unit_id,
    qty,
    COUNT(distinct parent_sales_order_id) AS freq
  FROM main
  GROUP BY all
),
lag_lead AS (
  SELECT
  region,
    PRODUCT_ID,sku,brand,cat,
	packing_unit_id,
    qty,
    freq,
    LAG(freq) OVER (PARTITION BY region,PRODUCT_ID,packing_unit_id ORDER BY qty) AS prev_freq,
    LEAD(freq) OVER (PARTITION BY region,PRODUCT_ID,packing_unit_id ORDER BY qty) AS next_freq
  FROM freq_table
),
most_freq as (
select * 
from (
select *,max(cntrb)over(partition by product_id,packing_unit_id,region) as max_cntrb
from (
SELECT *, freq/sum(freq) over(partition by product_id,packing_unit_id,region) as cntrb
FROM lag_lead ll 
WHERE (freq > COALESCE(prev_freq, -1))
  AND (freq > COALESCE(next_freq, -1))
  )
  )
  where cntrb >= max_cntrb- 0.05
  order by product_id
),
most_qty as (
select region,product_id,sku,cat,brand,packing_unit_id,ceil(sum(freq_cntrb*qty)) as final_qty 
from (
select *,freq/sum(freq)over(partition by  product_id,packing_unit_id,region) as freq_cntrb
from most_freq 
)
group by all 
),
final_data as (
select *,
ceil(GREATEST(
      recent_region_median + 0.75 * recent_std,
      final_qty,
	  region_median+0.75*std,
	  region_median+2,
	  2
    )) as tier_1,
 ceil(GREATEST(
      final_qty + 0.5 * std,
      region_q3 + 0.5 * std,
	  region_95 + 0.3*std,
      recent_region_q3 + 0.5 * recent_std,
	  tier_1*1.3
    )) as tier_2
from (
select  mq.*,region_q1,
region_median,
region_q3,
region_95,
std,
COALESCE(recent_region_median,0) as recent_region_median,
COALESCE(recent_region_q3,0) as recent_region_q3,
COALESCE(recent_std,0) as recent_std
from region_data rd 
join most_qty mq on rd.region =mq.region
and rd.product_id =  mq.product_id
and rd.packing_unit_id = mq.packing_unit_id 
left join recent_region_data rrd on rrd.region =mq.region
and rrd.product_id =  mq.product_id
and rrd.packing_unit_id = mq.packing_unit_id 
)
),
prices as (
SELECT  case when cpu.cohort_id in (700,695) then 'Cairo'
             when cpu.cohort_id in (701,695) then 'Giza'
             when cpu.cohort_id in (704,698) then 'Delta East'
             when cpu.cohort_id in (703,697) then 'Delta West'
             when cpu.cohort_id in (696) then 'Upper Egypt'
             when cpu.cohort_id in (702,699) then 'Alexandria'
        end as region,
        pu.product_id,
		pu.packing_unit_id as packing_unit_id,
		pu.basic_unit_count,
        avg(cpu.price) as price
FROM    cohort_product_packing_units cpu
join    PACKING_UNIT_PRODUCTS pu on pu.id = cpu.product_packing_unit_id
WHERE   cpu.cohort_id in (700,701,702,703,704,696,695,698,697,699)
    and cpu.created_at::date<>'2023-07-31'
    and cpu.is_customized = true
	group by all 
),
elast as (
with fill_rate as (



WITH
  whs AS (
    SELECT
      *
    FROM
      (
        VALUES
          ('Cairo', 'El-Marg', 38),
          ('Cairo', 'Mostorod', 1),
          ('Giza', 'Barageel', 236),
          ('Giza', 'Basatin', 39),
          ('Delta West', 'El-Mahala', 337),
          ('Delta West', 'Tanta', 8),
          ('Delta East', 'Mansoura FC', 339),
          ('Delta East', 'Sharqya', 170),
          ('Upper Egypt', 'Assiut FC', 501),
          ('Upper Egypt', 'Bani sweif', 401),
          ('Upper Egypt', 'Menya Samalot', 703),
          ('Upper Egypt', 'Sohag', 632),
          ('Alexandria', 'Khorshed Alex', 797)
      ) x (region, wh, warehouse_id)
  ),
  active_skus AS (
    SELECT
      CASE
        WHEN regions.name_en = 'Greater Cairo' THEN cities.name_en
        ELSE regions.name_en
      END AS region,
      pso.product_id AS product_id,
      sum(pso.total_price) AS nmv
    FROM
      sales_orders so
      JOIN product_sales_order pso ON so.id = pso.sales_order_id
      JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
      JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
      JOIN cities ON cities.id = districts.city_id
      JOIN states ON states.id = cities.state_id
      JOIN regions ON regions.id = states.region_id
    WHERE
      so.created_at::DATE >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '3 month')
      AND sales_order_status_id NOT IN (7, 12)
      AND so.channel IN ('telesales', 'retailer')
      AND pso.PURCHASED_ITEM_COUNT > 0
    GROUP BY
      ALL
    HAVING
      nmv > 0
  ),
  weighted_fill_rate AS (
    SELECT
      DAY,
      product_id,
      region,
      1 - (oos / total) AS fill_rate
    FROM
      (
        SELECT
          timestamp::DATE AS DAY,
          whs.region,
          categories.name_ar AS cat,
          brands.name_ar AS brand,
          products.id AS product_id,
          nmv,
          count(
            CASE
              WHEN ss.activation = FALSE
              OR ss.available_stock = 0 THEN timestamp
            END
          ) AS oos,
          count(timestamp) AS total
        FROM
          materialized_views.STOCK_SNAP_SHOTS_RECENT ss
          JOIN whs ON whs.warehouse_id = ss.warehouse_id
          JOIN products ON products.id = ss.product_id
          JOIN product_units ON product_units.id = products.unit_id
          JOIN categories ON categories.id = products.category_id
          JOIN brands ON brands.id = products.brand_id
          JOIN active_skus ON active_skus.region = whs.region
          AND active_skus.product_id = ss.product_id
        WHERE
          ss.timestamp::DATE >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '3 months')
        GROUP BY
          ALL
      )
  ),
  prs_in AS (
    WITH
      prs AS (
        SELECT DISTINCT
          purchased_receipts.date::DATE AS pr_date,
          max(date_part('hour', purchased_receipts.date)) AS HOUR,
          products.id AS product_id,
          CONCAT(
            products.name_ar,
            ' ',
            products.size,
            ' ',
            product_units.name_ar
          ) AS sku,
          brands.name_ar AS Brand,
          categories.name_ar AS category,
          purchased_receipts.warehouse_id AS warehouse_id,
          warehouses.name AS warehouse,
          sum(
            product_purchased_receipts.purchased_item_count * product_purchased_receipts.basic_unit_count
          ) AS purchase_min_count,
        FROM
          product_purchased_receipts
          LEFT JOIN products ON products.id = product_purchased_receipts.product_id
          LEFT JOIN packing_unit_products ON packing_unit_products.product_id = products.id
          LEFT JOIN purchased_receipts ON purchased_receipts.id = product_purchased_receipts.purchased_receipt_id
          LEFT JOIN purchased_receipt_statuses ON purchased_receipt_statuses.id = purchased_receipts.purchased_receipt_status_id
          LEFT JOIN packing_units ON packing_units.id = product_purchased_receipts.packing_unit_id
          LEFT JOIN product_units ON products.unit_id = product_units.id
          LEFT JOIN suppliers ON suppliers.id = purchased_receipts.supplier_id
          LEFT JOIN brands ON brands.id = products.brand_id
          LEFT JOIN categories ON categories.id = products.category_id
          LEFT JOIN warehouses ON warehouses.id = purchased_receipts.warehouse_id
        WHERE
          product_purchased_receipts.purchased_item_count <> 0
          AND purchased_receipts.purchased_receipt_status_id IN (4, 5, 7)
          AND purchased_receipts.date::DATE >= date_trunc('month', CURRENT_DATE - interval '3 months')
          --  AND purchased_receipts.is_actual = 'true'
        GROUP BY
          ALL
      ),
      sales_cntrb AS (
        SELECT
          date_part('hour', so.created_at) AS HOUR,
          pso.warehouse_id AS warehouse_id,
          sum(pso.total_price) AS nmv
        FROM
          sales_orders so
          JOIN product_sales_order pso ON so.id = pso.sales_order_id
        WHERE
          so.created_at::DATE BETWEEN CURRENT_DATE -30 AND CURRENT_DATE  -1
          AND sales_order_status_id NOT IN (7, 12)
          AND pso.purchased_item_count <> 0
          AND channel <> 'admin'
        GROUP BY
          ALL
      ),
      whs AS (
        SELECT
          *
        FROM
          (
            VALUES
              ('Cairo', 'El-Marg', 38),
              ('Cairo', 'Mostorod', 1),
              ('Giza', 'Barageel', 236),
              ('Giza', 'Basatin', 39),
              ('Delta West', 'El-Mahala', 337),
              ('Delta West', 'Tanta', 8),
              ('Delta East', 'Mansoura FC', 339),
              ('Delta East', 'Sharqya', 170),
              ('Upper Egypt', 'Assiut FC', 501),
              ('Upper Egypt', 'Bani sweif', 401),
              ('Upper Egypt', 'Menya Samalot', 703),
              ('Upper Egypt', 'Sohag', 632),
              ('Alexandria', 'Khorshed Alex', 797)
          ) x (region, wh, warehouse_id)
      )
    SELECT
      pr_date,
      product_id,
      sku,
      region,
      round(sum((1 - cntrb) * purchase_min_count), 0) AS in_stocks
    FROM
      (
        SELECT
          x.pr_date,
          x.hour,
          x.product_id,
          x.sku,
          x.warehouse_id,
          x.purchase_min_count,
          x.cntrb,
          region
        FROM
          (
            SELECT
              prs.*,
              sum(
                CASE
                  WHEN sc.hour < prs.hour THEN nmv
                END
              ) / sum(nmv) AS cntrb
            FROM
              prs
              LEFT JOIN sales_cntrb sc ON prs.warehouse_id = sc.warehouse_id
            GROUP BY
              ALL
          ) x
          JOIN whs ON whs.warehouse_id = x.warehouse_id
      )
    GROUP BY
      ALL
  )
        SELECT
          DATE,
          product_id,
          cat,
          brand,
          region,
          day_stocks,
          fill_rate
        FROM
          (
            SELECT
              *,
              coalesce(
                CASE
                  WHEN opening_stocks = 0
                  AND closing_stocks > 0 and fill_rate is not null THEN fill_rate * closing_stocks
                  WHEN closing_stocks > opening_stocks
                  AND in_stocks IS NOT NULL THEN opening_stocks + in_stocks
                  ELSE opening_stocks
                END,
                0
              ) AS day_stocks,
            FROM
              (
                SELECT
                  x.*,
                  coalesce(wfr.fill_rate, 0) AS fill_rate,
                  wac_p,
                  pin.in_stocks
                FROM
                  (
                    SELECT
                      *,
                      lag(closing_stocks) over (
                        PARTITION BY
                          product_id,
                          region
                        ORDER BY
                          DATE
                      ) AS opening_stocks
                    FROM
                      (
                        SELECT
                          sdc.timestamp::DATE AS DATE,
                          sdc.product_id,
                          c.name_ar AS cat,
                          b.name_ar AS brand,
                          whs.region,
                          sum(sdc.AVAILABLE_STOCK) AS closing_stocks
                        FROM
                          MATERIALIZED_VIEWS.STOCK_DAY_CLOSE sdc
                          JOIN whs ON whs.warehouse_id = sdc.warehouse_id
                          JOIN products p ON p.id = sdc.product_id
                          JOIN brands b ON b.id = p.BRAND_ID
                          JOIN categories c ON c.id = p.category_id
                        WHERE
                          timestamp::DATE  BETWEEN date_trunc('month',CURRENT_DATE - interval '3 months')  AND CURRENT_DATE - 1
                        GROUP BY
                          ALL
                      )
                    ORDER BY
                      DATE
                  ) x
                  LEFT JOIN weighted_fill_rate wfr ON wfr.product_id = x.product_id
                  AND x.date = wfr.day
                  AND x.region = wfr.region
                  LEFT JOIN finance.all_cogs f ON f.product_id = x.product_id
                  AND f.from_date::DATE <= x.date
                  AND f.to_date::DATE > x.date
                  LEFT JOIN prs_in pin ON pin.product_id = x.product_id
                  AND pin.region = x.region
                  AND pin.pr_date = x.date
              )
          ) z
        WHERE
          DATE BETWEEN date_trunc('month',CURRENT_DATE - interval '3 months')  AND CURRENT_DATE - 1
),
sales AS (
  select * 
  from (
  select s.*,  day_stocks,
  PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY qty) OVER (partition by s.region,s.product_id ) AS q1,
  PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY qty) OVER (partition by s.region,s.product_id ) AS q3,
  q3+(1.2*(q3-q1)) as max,
	q1-(1.2*(q3-q1)) as min
  from (
    SELECT DISTINCT
	case when regions.id = 2 then cities.name_en else regions.name_en end as region,
      so.created_at::DATE AS date,
      pso.product_id,
      CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) AS sku,
      brands.name_ar AS brand,
      categories.name_ar AS cat,
      SUM(pso.total_price) AS nmv,
      SUM(pso.purchased_item_count * basic_unit_count) AS qty
	  
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN products ON products.id = pso.product_id
    JOIN brands ON products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id
	JOIN    materialized_views.retailer_polygon poly ON poly.retailer_id = so.retailer_id
	JOIN    districts ON districts.id = poly.district_id
	JOIN    cities ON cities.id = districts.city_id
	JOIN    states ON states.id = cities.STATE_ID
	JOIN    regions ON regions.id = states.region_id
    JOIN finance.all_cogs f ON f.product_id = pso.product_id
      AND f.from_date::DATE <= so.created_at::DATE
      AND f.to_date::DATE > pso.created_at::DATE
    JOIN product_units ON product_units.id = products.unit_id
    WHERE so.created_at::DATE BETWEEN date_trunc('month',CURRENT_DATE - interval '3 months')  AND CURRENT_DATE - 1
      AND so.sales_order_status_id NOT IN (7, 12)
      AND so.channel IN ('telesales', 'retailer')
      AND pso.purchased_item_count <> 0
	  AND TO_CHAR(so.created_at::date, 'Day') <> 'Fri'
    GROUP BY ALL
	)s
	join fill_rate fr on fr.product_id = s.product_id and fr.region = s.region and fr.date = s.date
	)
	where qty between min and max 
	and qty <= day_stocks*0.95
  ),

  cohort_prices AS (
    SELECT CASE WHEN cohort_id = 700 THEN 'Cairo'
                             WHEN cohort_id = 701 THEN 'Giza'
                             WHEN cohort_id = 702 THEN 'Alexandria'
                             WHEN cohort_id = 703 THEN 'Delta West'
                             WHEN cohort_id = 704 THEN 'Delta East'
                             WHEN cohort_id = 696 THEN 'Upper Egypt'
                        ELSE cohort_id::varchar END as region,
      pup.product_id,
      cpc.price,
      cpc.created_at AS change_date,
      COALESCE(
        LEAD(cpc.created_at) OVER (PARTITION BY cohort_id, pup.product_id ORDER BY cpc.created_at),
        CURRENT_TIMESTAMP
      ) AS next_change
    FROM cohort_pricing_changes cpc
    LEFT JOIN packing_unit_products pup ON pup.id = cpc.product_packing_unit_id
    WHERE pup.is_basic_unit = 1
      AND cpc.created_at::DATE BETWEEN date_trunc('month',CURRENT_DATE - interval '4 months')  AND CURRENT_DATE - 1
	  and cpc.cohort_id in (700,701,702,703,704,696)
  )
  
  SELECT
    region,
    product_id,
    sku,
    cat,
    brand,
	sum(nmv) as nmv,
    SUM(elasticty * qty_cntrb) AS elas
  FROM (
      SELECT *,
        qty / SUM(qty) OVER (PARTITION BY region, product_id) AS qty_cntrb
      FROM (
        SELECT *,
          CASE
            WHEN price_change <> 0 THEN qty_change / price_change
            ELSE 0
          END AS elasticty
        FROM (
          SELECT *,
            (qty - old_qty) / old_qty AS qty_change,
            (price - old_price) / old_price AS price_change
          FROM (
            SELECT *,
              LAG(qty) OVER (PARTITION BY product_id, region ORDER BY price) AS old_qty,
              LAG(price) OVER (PARTITION BY product_id, region ORDER BY price) AS old_price
            FROM (
  			select region ,product_id,sku,brand,cat,price,avg(nmv) as nmv,avg(qty) as qty
			  from(
              SELECT s.*, cp.price
              FROM sales s
              JOIN cohort_prices cp ON cp.region = s.region
                AND s.product_id = cp.product_id
                AND s.date >= cp.change_date
                AND s.date < cp.next_change
				 
				)
				group by all 
				order by price
            )
            QUALIFY old_qty IS NOT NULL
          )
          WHERE (
            (price_change > 0 AND qty_change < 0) OR
            (price_change < 0 AND qty_change > 0)
          )
        )
      )
    )
  GROUP BY ALL
  order by nmv desc

),
ops as (
select region,product_id,cat,brand,case when nmv <> 0  then ops_cost/nmv else 0 end as ops_perc
from (
select region,product_id,cat.name_ar as cat,b.name_ar as brand, sum(NMV_EX_VAT) as nmv,sum(lm_cost+wh_cost+mm_cost) as ops_cost,
from finance.sku_costs c 
join products p on p.id = c.product_id 
join brands b on b.id = p.brand_id 
join CATEGORies cat on cat.id = p.category_id
where month between  date_trunc('month',current_date - interval '3 month') and date_trunc('month',current_date)-1
group by all 
)
group by all 
)
select region,product_id,packing_unit_id,sku,cat,brand,tier_1,tier_2,price,wac,margin,ops_perc,elas,t1_change,t2_change,

least(Greatest(COALESCE((ROUND(discount_1 / 0.0005) * 0.0005),0.15*margin),0.0035),0.015) as discount_1_final,
least(Greatest(COALESCE((ROUND(discount_2 / 0.0005) * 0.0005),0.25*margin),0.005,discount_1_final+0.005),0.02) as discount_2_final

from (
select * , 
(tier_1-region_median)/region_median as t1_change,(tier_2-region_median)/region_median as t2_change,COALESCE(t1_change/(elas*-1),0.15*margin) as discount_1 , COALESCE(t2_change/(elas*-1),0.25*margin) as discount_2
from (
select fd.*,price,wac_p * basic_unit_count as wac,(price-wac)/price as margin ,elas,o.ops_perc
from final_data fd 
join prices p on p.region = fd.region and p.product_id = fd.product_id and p.packing_unit_id = fd.packing_unit_id
join finance.all_cogs f on f.product_id = fd.product_id and CURRENT_TIMESTAMP between from_date and to_date 
left join elast e on e.product_id = fd.product_id and e.region = fd.region
join ops o on o.product_id = fd.product_id and o.region = case when fd.region in ('Cairo','Giza') then 'Greater Cairo' when fd.region like '%Delta%' then 'Delta' else fd.region end 
where margin > 0 
)
) 
'''
quantity_disc_data = query_snowflake(command_string, columns = ['region','product_id','packing_unit_id','sku','cat','brand','tier_1','tier_2','price','wac','margin','ops_perc','elas','t1_change','t2_change','discount_1_final','discount_2_final'])
quantity_disc_data.product_id = pd.to_numeric(quantity_disc_data.product_id)
quantity_disc_data.packing_unit_id = pd.to_numeric(quantity_disc_data.packing_unit_id)
quantity_disc_data.tier_1 = pd.to_numeric(quantity_disc_data.tier_1)
quantity_disc_data.tier_2 = pd.to_numeric(quantity_disc_data.tier_2)

quantity_disc_data.price = pd.to_numeric(quantity_disc_data.price)
quantity_disc_data.wac = pd.to_numeric(quantity_disc_data.wac)
quantity_disc_data.margin = pd.to_numeric(quantity_disc_data.margin)

quantity_disc_data.ops_perc = pd.to_numeric(quantity_disc_data.ops_perc)
quantity_disc_data.elas = pd.to_numeric(quantity_disc_data.elas)
quantity_disc_data.t1_change = pd.to_numeric(quantity_disc_data.t1_change)
quantity_disc_data.t2_change = pd.to_numeric(quantity_disc_data.t2_change)

quantity_disc_data.discount_1_final = pd.to_numeric(quantity_disc_data.discount_1_final)
quantity_disc_data.discount_2_final = pd.to_numeric(quantity_disc_data.discount_2_final)

quantity_disc_data = quantity_disc_data[~quantity_disc_data['cat'].isin(['كروت شحن','مياه معدنيه','مقرمشات','شيبسي'])]
quantity_disc_data.loc[quantity_disc_data['cat']== 'شوكولاتة','ops_perc'] = 0.03

In [None]:
query = '''

SELECT  DISTINCT
		case when regions.id = 2 then states.name_en else regions.name_en end as region,
		pso.product_id,
        sum(pso.total_price) as nmv

FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id
JOIN categories ON products.category_id = categories.id and categories.name_ar not like '%سايب%'
JOIN product_units ON product_units.id = products.unit_id 
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id               

WHERE   so.created_at ::date between date_trunc('month',current_date - interval '2 months') and current_date -1 
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
'''
sales  = query_snowflake(query, columns = ['region','product_id','nmv'])
sales.product_id = pd.to_numeric(sales.product_id)
sales.nmv = pd.to_numeric(sales.nmv)

In [None]:

cond = [(quantity_disc_data['margin'] - quantity_disc_data['ops_perc'])<0,
        ((quantity_disc_data['margin'] - quantity_disc_data['ops_perc']) > 0) &
    ((quantity_disc_data['margin'] - quantity_disc_data['ops_perc'] - quantity_disc_data['discount_1_final']) < 0.01)
       ]
cho_1= [np.minimum(quantity_disc_data['discount_1_final'],quantity_disc_data['margin']*0.15),
        np.minimum(quantity_disc_data['discount_1_final'],quantity_disc_data['margin']*0.25)
       ]

cho_2 =[np.minimum(quantity_disc_data['discount_2_final'],quantity_disc_data['margin']*0.25),
        np.minimum(quantity_disc_data['discount_2_final'],quantity_disc_data['margin']*0.35)
       ]

quantity_disc_data['discount_1'] = np.select(cond,cho_1,default=quantity_disc_data['discount_1_final'])
quantity_disc_data['discount_2'] = np.select(cond,cho_2,default=quantity_disc_data['discount_2_final'])
quantity_disc_data['discount_1']=np.maximum(quantity_disc_data['discount_1'],0.0035)
quantity_disc_data['discount_2']=np.maximum(quantity_disc_data['discount_2'],0.005)

In [None]:
quantity_disc_data['median'] = quantity_disc_data['tier_1']/(quantity_disc_data['t1_change']+1)
quantity_disc_data['profit_median'] = quantity_disc_data['median'] *quantity_disc_data['price'] * quantity_disc_data['margin']
quantity_disc_data['profit_t1'] = quantity_disc_data['tier_1'] *quantity_disc_data['price'] * (quantity_disc_data['margin']-quantity_disc_data['discount_1'])
quantity_disc_data['profit_t2'] = quantity_disc_data['tier_2'] *quantity_disc_data['price'] * (quantity_disc_data['margin']-quantity_disc_data['discount_2'])

In [None]:
quantity_disc_data = quantity_disc_data[(quantity_disc_data['profit_median']<quantity_disc_data['profit_t1'])&(quantity_disc_data['profit_t1']<quantity_disc_data['profit_t2'])]

In [None]:
quantity_disc_data = quantity_disc_data[quantity_disc_data['margin'] - quantity_disc_data['ops_perc']>-0.015]

In [None]:
quantity_disc_data = quantity_disc_data.merge(sales,on = ['region','product_id'])
quantity_disc_data= quantity_disc_data.sort_values(['region', 'nmv'], ascending=[True, False])

In [None]:
quantity_disc_data['row_number'] = quantity_disc_data.groupby('region').cumcount() + 1
quantity_disc_data = quantity_disc_data[quantity_disc_data['row_number']<=100]
quantity_disc_data.to_excel('quantity_disc_sku_list.xlsx')

In [None]:
final_quantity_discount = pd.DataFrame(columns =['region','Discounts Group 1','Discounts Group 2','Description'])
for reg in quantity_disc_data.region.unique():
    region_data = quantity_disc_data[quantity_disc_data['region']== reg]
    for i,r in region_data.iterrows():
        region = r['region']
        product_id = r['product_id']
        packing_unit_id = r['packing_unit_id']
        q_1 = int(r['tier_1'])
        q_2 = int(r['tier_2'])
        d_1 = round(r['discount_1']*100,2)
        d_2 = round(r['discount_2']*100,2)
        a_1 = [product_id]+[packing_unit_id]+[q_1]+[d_1]
        a_2 = [product_id]+[packing_unit_id]+[q_2]+[d_2]
        new_row = {'region':region ,'Discounts Group 1':a_1,'Discounts Group 2':a_2,'Description':f'{reg}_QD'}
        new_row_df = pd.DataFrame([new_row]) 
        final_quantity_discount = pd.concat([final_quantity_discount, new_row_df], ignore_index=True)
final_quantity_discount    

In [None]:
Tag_def = {
    'region': ['Cairo', 'Giza', 'Alexandria', 'Upper Egypt', 'Delta East', 'Delta West'],
    'Tag ID': [2807, 2808, 2809, 2810, 2811, 2812]
}

Tag_map = pd.DataFrame(Tag_def)

In [None]:
from datetime import datetime, timedelta
import pytz  # install with: pip install pytz

# Replace 'Africa/Cairo' with your local timezone
local_tz = pytz.timezone('Africa/Cairo')

time_plus_10 = (datetime.now(local_tz) + timedelta(minutes=10)).strftime('%d/%m/%Y %H:%M')
time_plus_1_week = (datetime.now(local_tz) + timedelta(days=2)+ timedelta(minutes=10)).strftime('%d/%m/%Y %H:%M')

final_quantity_discount = final_quantity_discount.merge(Tag_map,on='region')
final_quantity_discount['Start Date/Time']= time_plus_10
final_quantity_discount['End Date/Time']= time_plus_1_week

final_quantity_discount

In [None]:
final_data = final_quantity_discount.groupby(['Tag ID','Description', 'Start Date/Time', 'End Date/Time'], as_index=False).agg({
    'Discounts Group 1': list ,
    'Discounts Group 2' : list
})

In [None]:
final_data

In [32]:
final_data.to_excel('QD_upload.xlsx', index=False)

In [33]:
#new

In [23]:
qd_new = pd.read_excel('qd_data.xlsx')
qd_new.columns = qd_new.columns.str.lower()

In [24]:
query = '''
select  id as cohort_id,dynamic_tag_id
from cohorts 
where is_active='true'
and id in (700,702,702,703,704,1123,1124,1125,1126)
'''
cohort_data  = query_snowflake(query, columns = ['cohort_id','Tag_id'])
cohort_data.cohort_id = pd.to_numeric(cohort_data.cohort_id)
cohort_data.Tag_id = pd.to_numeric(cohort_data.Tag_id)

In [25]:
qd_new = qd_new.merge(cohort_data,on=['cohort_id'])
qd_new.columns

Index(['cohort_id', 'cohort_name', 'product_id', 'sku', 'cat', 'brand',
       'packing_unit_id', 'tier_1', 'tier_2', 'discount_1', 'discount_2',
       'Tag_id'],
      dtype='object')

In [26]:


final_quantity_discount = pd.DataFrame(columns =['Tag ID','Discounts Group 1','Discounts Group 2','Description'])
for tag in qd_new.Tag_id.unique():
    region_data = qd_new[qd_new['Tag_id']== tag]
    for i,r in region_data.iterrows():
        tag = r['Tag_id']
        product_id = r['product_id']
        packing_unit_id = r['packing_unit_id']
        q_1 = int(r['tier_1'])
        q_2 = int(r['tier_2'])
        d_1 = round(r['discount_1']*100,2)
        d_2 = round(r['discount_2']*100,2)
        a_1 = [product_id]+[packing_unit_id]+[q_1]+[d_1]
        a_2 = [product_id]+[packing_unit_id]+[q_2]+[d_2]
        new_row = {'Tag ID':tag ,'Discounts Group 1':a_1,'Discounts Group 2':a_2,'Description':f'pepsi_{tag}_QD'}
        new_row_df = pd.DataFrame([new_row]) 
        final_quantity_discount = pd.concat([final_quantity_discount, new_row_df], ignore_index=True)    

In [27]:
from datetime import datetime, timedelta
import pytz  # install with: pip install pytz

# Replace 'Africa/Cairo' with your local timezone
local_tz = pytz.timezone('Africa/Cairo')

time_plus_10 = (datetime.now(local_tz) + timedelta(minutes=10)).strftime('%d/%m/%Y %H:%M')
time_plus_1_week = (datetime.now(local_tz) + timedelta(days=5)+ timedelta(hours=9)).strftime('%d/%m/%Y %H:%M')

final_quantity_discount['Start Date/Time']= time_plus_10
final_quantity_discount['End Date/Time']= time_plus_1_week

final_quantity_discount

Unnamed: 0,Tag ID,Discounts Group 1,Discounts Group 2,Description,Start Date/Time,End Date/Time
0,272,"[435, 2, 3, 0.73]","[435, 2, 5, 2.24]",pepsi_272_QD,26/08/2025 12:07,31/08/2025 20:57
1,272,"[434, 2, 4, 0.73]","[434, 2, 6, 2.24]",pepsi_272_QD,26/08/2025 12:07,31/08/2025 20:57
2,272,"[126, 2, 3, 0.93]","[126, 2, 4, 2.86]",pepsi_272_QD,26/08/2025 12:07,31/08/2025 20:57
3,272,"[8915, 2, 5, 0.97]","[8915, 2, 7, 3.0]",pepsi_272_QD,26/08/2025 12:07,31/08/2025 20:57
4,271,"[140, 2, 3, 0.93]","[140, 2, 4, 2.86]",pepsi_271_QD,26/08/2025 12:07,31/08/2025 20:57
5,271,"[434, 2, 3, 0.73]","[434, 2, 5, 2.24]",pepsi_271_QD,26/08/2025 12:07,31/08/2025 20:57
6,271,"[589, 2, 4, 0.26]","[589, 2, 6, 1.06]",pepsi_271_QD,26/08/2025 12:07,31/08/2025 20:57
7,271,"[326, 2, 3, 0.93]","[326, 2, 5, 2.86]",pepsi_271_QD,26/08/2025 12:07,31/08/2025 20:57
8,271,"[126, 2, 3, 0.93]","[126, 2, 4, 2.86]",pepsi_271_QD,26/08/2025 12:07,31/08/2025 20:57
9,269,"[126, 2, 3, 0.93]","[126, 2, 5, 2.86]",pepsi_269_QD,26/08/2025 12:07,31/08/2025 20:57


In [28]:
final_data = final_quantity_discount.groupby(['Tag ID','Description', 'Start Date/Time', 'End Date/Time'], as_index=False).agg({
    'Discounts Group 1': list ,
    'Discounts Group 2' : list
})

In [30]:
final_data.to_excel("Pepsi_QD.xlsx")