In [None]:
%%capture

# Upgrade pip
!pip install --upgrade pip
# Connectivity
!pip install psycopg2-binary  # PostgreSQL adapter
# !pip install snowflake-connector-python  # Snowflake connector
!pip install snowflake-connector-python==3.15.0 # Snowflake connector Older Version
!pip install snowflake-sqlalchemy  # Snowflake SQLAlchemy connector
!pip install warnings # Warnings management
# !pip install pyarrow # Serialization
!pip install keyring==23.11.0 # Key management
!pip install sqlalchemy==1.4.46 # SQLAlchemy
!pip install requests # HTTP requests
!pip install boto3 # AWS SDK
# !pip install slackclient # Slack API
!pip install oauth2client # Google Sheets API
!pip install gspread==5.9.0 # Google Sheets API
!pip install gspread_dataframe # Google Sheets API
!pip install google.cloud # Google Cloud
# Data manipulation and analysis
!pip install polars
!pip install pandas==2.2.1
!pip install numpy
# !pip install fastparquet
!pip install openpyxl # Excel file handling
!pip install xlsxwriter # Excel file handling
# Linear programming
!pip install pulp
# Date and time handling
!pip install --upgrade datetime
!pip install python-time
!pip install --upgrade pytz
# Progress bar
!pip install tqdm
# Database data types
!pip install db-dtypes
# Geospatial data handling
# !pip install geopandas
# !pip install shapely
# !pip install fiona
# !pip install haversine
# Plotting

# Modeling
!pip install statsmodels
!pip install scikit-learn

!pip install import-ipynb

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import calendar
import json
from datetime import date, timedelta
from oauth2client.service_account import ServiceAccountCredentials
import setup_environment_2
import importlib
import import_ipynb
import warnings
import demand_sku_cntrb
import time
warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()
import gspread

/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json
/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


In [2]:
def query_snowflake(query, columns=[]):
    import os
    import snowflake.connector
    import numpy as np
    import pandas as pd
    con = snowflake.connector.connect(
        user =  os.environ["SNOWFLAKE_USERNAME"],
        account= os.environ["SNOWFLAKE_ACCOUNT"],
        password= os.environ["SNOWFLAKE_PASSWORD"],
        database =os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        if len(columns) == 0:
            out = pd.DataFrame(np.array(cur.fetchall()))
        else:
            out = pd.DataFrame(np.array(cur.fetchall()),columns=columns)
        return out
    except Exception as e:
        print("Error: ", e)
    finally:
        cur.close()
        con.close()

In [3]:
query = '''
SHOW PARAMETERS LIKE 'TIMEZONE'
'''
x  = query_snowflake(query)
zone_to_use = x[1].values[0]
zone_to_use

'America/Los_Angeles'

In [4]:
scope = ["https://spreadsheets.google.com/feeds",
         'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(setup_environment_2.get_secret("prod/maxab-sheets")), scope)
client = gspread.authorize(creds)
min_max = client.open('Demand Based Dynamic Pricing').worksheet('min_max_margin_cohort')
min_max_df = pd.DataFrame(min_max.get_all_records())
for col in min_max_df.columns:
    min_max_df[col] = pd.to_numeric(min_max_df[col], errors='ignore')  

In [5]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id)),
full_data as (
select products.id as product_id, region
from products , whs 
where activation = 'true'
),				

MP as (
select region,product_id,
min(min_price) as min_price,
min(max_price) as max_price,
min(mod_price) as mod_price,
min(true_min) as true_min,
min(true_max) as true_max

from (
select mp.region,mp.product_id,mp.pu_id,
min_price/BASIC_UNIT_COUNT as min_price,
max_price/BASIC_UNIT_COUNT as max_price,
mod_price/BASIC_UNIT_COUNT as mod_price,
TRUE_MIN_PRICE/BASIC_UNIT_COUNT as true_min,
TRUE_MAX_PRICE/BASIC_UNIT_COUNT as true_max
from materialized_views.marketplace_prices mp 
join packing_unit_products pup on pup.product_id = mp.product_id and pup.packing_unit_id = mp.pu_id
join finance.all_cogs f on f.product_id = mp.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date
where  least(min_price,mod_price) between wac_p*0.9 and wac_p*1.3 
)
group by all 
),
region_mapping AS (
    SELECT * 
	FROM 
	(	VALUES
        ('Delta East', 'Delta West'),
        ('Delta West', 'Delta East'),
        ('Alexandria', 'Cairo'),
        ('Alexandria', 'Giza'),
        ('Upper Egypt', 'Cairo'),
        ('Upper Egypt', 'Giza'),
		('Cairo','Giza'),
		('Giza','Cairo'),
		('Delta West', 'Cairo'),
		('Delta East', 'Cairo'),
		('Delta West', 'Giza'),
		('Delta East', 'Giza')
		)
    AS region_mapping(region, fallback_region)
),
final_mp as (
select region,product_id,
min(final_min_price) as final_min_price,
min(final_max_price) as final_max_price,
min(final_mod_price) as final_mod_price,
min(final_true_min) as final_true_min,
min(final_true_max) as final_true_max

from (
SELECT
distinct 
	w.region,
	w.product_id,
    COALESCE(m1.min_price, m2.min_price) AS final_min_price,
    COALESCE(m1.max_price, m2.max_price) AS final_max_price,
    COALESCE(m1.mod_price, m2.mod_price) AS final_mod_price,
	COALESCE(m1.true_min, m2.true_min) AS final_true_min,
	COALESCE(m1.true_max, m2.true_max) AS final_true_max,
FROM full_data w
LEFT JOIN MP m1
    ON w.region = m1.region and w.product_id = m1.product_id
JOIN region_mapping rm
    ON w.region = rm.region
LEFT JOIN MP m2
    ON rm.fallback_region = m2.region
   AND w.product_id = m2.product_id
)
where final_min_price is not null 
group by all 
),
ben_soliman as (
select z.* 
from (
select maxab_product_id as product_id,maxab_sku as sku,avg(bs_final_price) as ben_soliman_price
from (
select * , row_number()over(partition by maxab_product_id order by diff) as rnk_2
from (
select *,(bs_final_price-wac_p)/wac_p as diff_2
from (
select * ,bs_price/maxab_basic_unit_count as bs_final_price
from (
select *,row_number()over(partition by maxab_product_id,maxab_pu order by diff) as rnk 
from (
select sm.* ,max(INJECTION_DATE::date)over(partition by maxab_product_id,maxab_pu) as max_date,wac1,wac_p,abs(bs_price-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff 
from materialized_views.savvy_mapping sm 
join finance.all_cogs f on f.product_id = sm.maxab_product_id and current_timestamp between f.from_Date and f.to_date
where bs_price is not null 
and INJECTION_DATE::date >= CURRENT_DATE- 5
qualify INJECTION_DATE::date = max_date
)
qualify rnk = 1 
)
)
where diff_2 between -0.5 and 0.5 
)
qualify rnk_2 = 1 
)
group by all
)z 
join finance.all_cogs f on f.product_id = z.product_id and current_timestamp between f.from_Date and f.to_date

where ben_soliman_price between f.wac_p*0.9 and f.wac_p*1.3
),
scrapped_data as (
select product_id,cat,brand,region,max_date,min(MARKET_PRICE) as min_scrapped,max(MARKET_PRICE) as max_scrapped,median(MARKET_PRICE) as median_scrapped
from (
select MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.*,max(date)over(partition by region,MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id,competitor) as max_date
from MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES
join finance.all_cogs f on f.product_id = MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date 
where date>= current_date -5
and MARKET_PRICE between f.wac_p * 0.9 and wac_p*1.3
qualify date = max_date 
)
group by all 
),
local_prices as (
SELECT  case when cpu.cohort_id in (700) then 'Cairo'
             when cpu.cohort_id in (701) then 'Giza'
             when cpu.cohort_id in (704) then 'Delta East'
             when cpu.cohort_id in (703) then 'Delta West'
             when cpu.cohort_id in (1123,1124,1125,1126) then 'Upper Egypt'
             when cpu.cohort_id in (702) then 'Alexandria'
        end as region,
		cohort_id,
        pu.product_id,
		pu.packing_unit_id as packing_unit_id,
		pu.basic_unit_count,
        avg(cpu.price) as price
FROM    cohort_product_packing_units cpu
join    PACKING_UNIT_PRODUCTS pu on pu.id = cpu.product_packing_unit_id
WHERE   cpu.cohort_id in (700,701,702,703,704,1123,1124,1125,1126)
    and cpu.created_at::date<>'2023-07-31'
    and cpu.is_customized = true
	group by all 
),
live_prices as (
select region,cohort_id,product_id,pu_id as packing_unit_id,buc as basic_unit_count,NEW_PRICE as price
from materialized_views.DBDP_PRICES
where created_at = CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date
and DATE_PART('hour', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::time) BETWEEN SPLIT_PART(time_slot, '-', 1)::int AND SPLIT_PART(time_slot, '-', 2)::int
and cohort_id in (700,701,702,703,704,696,695,698,697,699,1123,1124,1125,1126)
),
prices as (
select *
from (
    SELECT *, 1 AS priority FROM live_prices
    UNION ALL
    SELECT *, 2 AS priority FROM local_prices
)
QUALIFY ROW_NUMBER() OVER (PARTITION BY region,cohort_id,product_id,packing_unit_id ORDER BY priority) = 1
),

maxab_prices as (
select region,cohort_id,product_id,price 
from prices 
where basic_unit_count = 1 
),
sales as (
SELECT  DISTINCT
		cpc.cohort_id,
		pso.product_id,
        sum(pso.total_price) as nmv
FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id   
join COHORT_PRICING_CHANGES cpc on cpc.id = pso.cohort_pricing_change_id
WHERE   True
    AND so.created_at::date between date_trunc('month', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 90) and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
GROUP BY ALL
),
margin_change as (
select product_id,cohort_id,(0.6*product_std) +(0.3*brand_std) + (0.1*cat_std) as std,avg_margin
from (
select product_id,cohort_id,stddev(product_margin) as product_std , stddev(brand_margin) as brand_std,stddev(cat_margin) as cat_std,avg(product_margin) as avg_margin
from (
select distinct product_id,order_date,cohort_id,(nmv-cogs_p)/nmv as product_margin,(brand_nmv-brand_cogs)/brand_nmv as brand_margin,(cat_nmv-cat_cogs)/cat_nmv as cat_margin
from(
SELECT  DISTINCT
		so.created_at::date as order_date,
		cpc.cohort_id,
		pso.product_id,
		brands.name_ar as brand, 
		categories.name_ar as cat,
       sum(COALESCE(f.wac_p,0) * pso.purchased_item_count * pso.basic_unit_count) as cogs_p,
    	sum(pso.total_price) as nmv,
		sum(nmv) over(partition by order_date,cat,brand) as brand_nmv,
		sum(cogs_p) over(partition by order_date,cat,brand) as brand_cogs,
		sum(nmv) over(partition by order_date,cat) as cat_nmv,
		sum(cogs_p) over(partition by order_date,cat) as cat_cogs
FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id   
join COHORT_PRICING_CHANGES cpc on cpc.id = pso.cohort_pricing_change_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f  ON f.product_id = pso.product_id
                        AND f.from_date::date <= so.created_at::date
                        AND f.to_date::date > so.created_at::date
						
WHERE  so.created_at::date between date_trunc('month', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - 120) and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
GROUP BY ALL
)
)

group by all 
)
),
cat_brand_target as (
SELECT DISTINCT cat, brand, margin as target_bm
FROM    performance.commercial_targets cplan
QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE)OVER()) = DATE_TRUNC('month', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date) THEN DATE_TRUNC('month', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date)
ELSE DATE_TRUNC('month', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - INTERVAL '1 month') END = DATE_TRUNC('month', date)
),
cat_target as (

select cat,sum(target_bm *(target_nmv/cat_total)) as cat_target_margin
from (
select *,sum(target_nmv)over(partition by cat) as cat_total
from (
select cat,brand,avg(target_bm) as target_bm , sum(target_nmv) as target_nmv
from (
SELECT DISTINCT date,city as region,cat, brand, margin as target_bm,nmv as target_nmv
FROM    performance.commercial_targets cplan
QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE)OVER()) = DATE_TRUNC('month', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date) THEN DATE_TRUNC('month', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date)
ELSE DATE_TRUNC('month', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP())::date - INTERVAL '1 month') END = DATE_TRUNC('month', date)
)
group by all
)
)
group by all 
)

select cohort_id,product_id,sku,cat,brand,
case when min_status = 1 and new_min_status = 1 then  min_margin when min_status = 0 and new_min_status = 1 then new_min_margin else min_margin end as min_margin,
case when new_min_status = 0 then min_margin else new_min_margin end as new_min_margin,
avg_market_margin,
new_avg_market_margin,
target_margin,
current_margin
from (
select *,
(all_mp_mins-wac_p)/all_mp_mins as min_margin,
(new_min-wac_p)/new_min as new_min_margin,
(average_price-wac_p)/average_price as avg_market_margin,
(average_new-wac_p)/average_new as new_avg_market_margin,
(maxab_price-wac_p)/maxab_price as current_margin,
greatest(least(current_margin - (1.5*std) , avg_margin - (0.5*std),target_margin-(2*std)),0.005) as lower_bound,
greatest(current_margin + (3*std) , avg_margin + (4*std),target_margin+(3*std)) as upper_bound,
case when min_margin between lower_bound and upper_bound then 1 else 0 end as min_status,
case when new_min_margin between lower_bound and upper_bound then 1 else 0 end as new_min_status

from (
select distinct maxab.cohort_id,
maxab.product_id,
CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
brands.name_ar as brand, 
categories.name_ar as cat,
maxab.price as maxab_price,bs.ben_soliman_price,
final_min_price,
final_max_price,
final_mod_price,
min_scrapped,
median_scrapped,
max_scrapped,
wac_p,
NULLIF(
    LEAST(
        CASE WHEN final_min_price <> 0 AND final_min_price IS NOT NULL 
            THEN final_min_price ELSE 999999999 END,
        CASE WHEN final_mod_price <> 0 AND final_mod_price IS NOT NULL 
            THEN final_mod_price ELSE 999999999 END,
        CASE WHEN ben_soliman_price <> 0 AND ben_soliman_price IS NOT NULL 
            THEN ben_soliman_price ELSE 999999999 END,
        CASE WHEN min_scrapped <> 0 AND min_scrapped IS NOT NULL 
            THEN min_scrapped ELSE 999999999 END
    ), 
    999999999
) AS all_mp_mins,

(
    COALESCE(ben_soliman_price, 0) * 0.4 * (CASE WHEN ben_soliman_price IS NOT NULL AND ben_soliman_price <> 0 THEN 1 ELSE 0 END) + 
    COALESCE(NULLIF(LEAST(
        CASE WHEN final_min_price <> 0 AND final_min_price IS NOT NULL 
            THEN final_min_price ELSE 999999999 END,
        CASE WHEN final_mod_price <> 0 AND final_mod_price IS NOT NULL 
            THEN final_mod_price ELSE 999999999 END
    ), 999999999), 0) * 0.25 * (CASE WHEN (final_min_price IS NOT NULL AND final_min_price <> 0) 
                                          OR (final_mod_price IS NOT NULL AND final_mod_price <> 0) THEN 1 ELSE 0 END) + 
    COALESCE(min_scrapped, 0) * 0.35 * (CASE WHEN min_scrapped IS NOT NULL AND min_scrapped <> 0 THEN 1 ELSE 0 END)
) / NULLIF(
    (0.4 * (CASE WHEN ben_soliman_price IS NOT NULL AND ben_soliman_price <> 0 THEN 1 ELSE 0 END) +
     0.25 * (CASE WHEN (final_min_price IS NOT NULL AND final_min_price <> 0) 
                   OR (final_mod_price IS NOT NULL AND final_mod_price <> 0) THEN 1 ELSE 0 END) +
     0.35 * (CASE WHEN min_scrapped IS NOT NULL AND min_scrapped <> 0 THEN 1 ELSE 0 END)),
    0
) AS new_min,

    (COALESCE((final_min_price + final_max_price) / 2, 0) + 
     COALESCE(ben_soliman_price, 0) + 
     COALESCE(final_mod_price, 0)+
	 coalesce(median_scrapped,0)
	 ) / 
    NULLIF(
        (CASE WHEN (final_min_price + final_max_price) / 2 IS NOT NULL and final_min_price >0 and final_max_price >0 THEN 1 ELSE 0 END +
         CASE WHEN ben_soliman_price IS NOT NULL and ben_soliman_price >0  THEN 1 ELSE 0 END +
		 CASE WHEN median_scrapped IS NOT NULL and median_scrapped >0  THEN 1 ELSE 0 END +
         CASE WHEN final_mod_price IS NOT NULL and final_mod_price > 0 THEN 1 ELSE 0 END), 
        0
    ) AS average_price,
	(
    COALESCE((final_min_price + final_max_price) / 2, 0) * 0.25 * 
        (CASE WHEN final_min_price > 0 AND final_max_price > 0 THEN 1 ELSE 0 END) + 
    COALESCE(ben_soliman_price, 0) * 0.4 * 
        (CASE WHEN ben_soliman_price IS NOT NULL AND ben_soliman_price > 0 THEN 1 ELSE 0 END) + 
    COALESCE(final_mod_price, 0) * 0.25 * 
        (CASE WHEN final_mod_price IS NOT NULL AND final_mod_price > 0 THEN 1 ELSE 0 END) +
    COALESCE(median_scrapped, 0) * 0.1 * 
        (CASE WHEN median_scrapped IS NOT NULL AND median_scrapped > 0 THEN 1 ELSE 0 END)
) / 
NULLIF(
    (0.25 * (CASE WHEN final_min_price > 0 AND final_max_price > 0 THEN 1 ELSE 0 END) +
     0.4 * (CASE WHEN ben_soliman_price IS NOT NULL AND ben_soliman_price > 0 THEN 1 ELSE 0 END) +
     0.25 * (CASE WHEN final_mod_price IS NOT NULL AND final_mod_price > 0 THEN 1 ELSE 0 END) +
     0.1 * (CASE WHEN median_scrapped IS NOT NULL AND median_scrapped > 0 THEN 1 ELSE 0 END)),
    0
) AS average_new,
	
coalesce(nmv,0) as nmv,
coalesce(mc.std,0.01) as std,
coalesce(coalesce(cbt.target_bm , ct.cat_target_margin),0) as target_margin,
coalesce(avg_margin,0) as avg_margin

from maxab_prices maxab
left join ben_soliman bs on bs.product_id = maxab.product_id
left join final_mp fmp on fmp.product_id = maxab.product_id and fmp.region = maxab.region
left join sales s on s.product_id = maxab.product_id and s.cohort_id = maxab.cohort_id
left join scrapped_data  sd on sd.product_id = maxab.product_id and sd.region = maxab.region
join finance.all_cogs f on f.product_id = maxab.product_id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMESTAMP()) between f.from_date and f.to_date
JOIN products on products.id=maxab.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN product_units ON product_units.id = products.unit_id 
left join margin_change mc on mc.product_id = maxab.product_id and mc.cohort_id = maxab.cohort_id
left join cat_brand_target cbt on cbt.brand = brands.name_ar and cbt.cat = categories.name_ar 
left join cat_target ct on ct.cat = categories.name_ar 
)
where all_mp_mins is not null 
and (min_status = 1 or new_min_status = 1)
)
order by NMV desc
'''
market_data   = query_snowflake(query, columns = ['cohort_id','product_id','sku','cat','brand','min_market_margin','new_min_margin','avg_market_margin','new_avg_margin','target_margin','current_margin'])
market_data.columns = market_data.columns.str.lower()
for col in market_data.columns:
    market_data[col] = pd.to_numeric(market_data[col], errors='ignore')   
market_data = market_data[['cohort_id','product_id','min_market_margin','new_min_margin','avg_market_margin','new_avg_margin','target_margin','current_margin']]   
market_data = market_data[(market_data['min_market_margin'] > 0)&(market_data['avg_market_margin'] > 0) ]
market_data=market_data.drop_duplicates(subset=['cohort_id','product_id'])

In [6]:
query ='''
select region , product_id,new_pp,forecasted_date
from materialized_views.DBDP_PRICE_UPS
'''
price_ups  = query_snowflake(query, columns = ['region','product_id','new_pp','forcasted_date'])
price_ups.columns = price_ups.columns.str.lower()
for col in price_ups.columns:
    price_ups[col] = pd.to_numeric(price_ups[col], errors='ignore')    

In [7]:
query = '''
SELECT  DISTINCT
		cpc.cohort_id,  
		pso.product_id,
		CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
		brands.name_ar as brand, 
		categories.name_ar as cat,
        sum(pso.total_price) as nmv


FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
join COHORT_PRICING_CHANGES cpc on cpc.id = pso.COHORT_PRICING_CHANGE_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN product_units ON product_units.id = products.unit_id 
          

WHERE   True
    AND so.created_at::date between  current_date - 60 and current_date -1 
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
    and cpc.cohort_id in (700,701,702,703,704,1123,1124,1125,1126)

GROUP BY ALL
'''
sales  = query_snowflake(query, columns = ['cohort_id','product_id','sku','brand','cat','nmv'])
sales.columns = sales.columns.str.lower()
for col in sales.columns:
    sales[col] = pd.to_numeric(sales[col], errors='ignore')    

In [None]:
def assign_tier(cumulative_contribution):
    if cumulative_contribution <= 40:
        return 1
    elif cumulative_contribution <= 70:
        return 2
    elif cumulative_contribution <= 90:
        return 3
    else:
        return 4

In [None]:
#sales['products'] =  1 
sales['total_nmv'] = sales.groupby('cohort_id')['nmv'].transform(sum)
#sales['total_products'] = sales.groupby('cohort_id')['products'].transform(sum)
sales['cntrb_nmv'] = sales['nmv']/sales['total_nmv']
#sales['cntrb_products'] = sales['products']/sales['total_products']
sales = sales.sort_values(['cohort_id', 'nmv'], ascending=[True, False])
sales['nmv_cumulative_cntrb'] = sales.groupby('cohort_id')['cntrb_nmv'].cumsum()
#sales['products_cumulative_cntrb'] = sales.groupby('cohort_id')['cntrb_products'].cumsum()
#sales['top_80_pct'] = sales['nmv_cumulative_cntrb'] <= 0.8
sales['tier'] = sales['nmv_cumulative_cntrb'].apply(assign_tier)

In [None]:
found = min_max_df.merge(market_data,on=['cohort_id','product_id'])
found = found.merge(sales[['cohort_id','product_id','tier']],on=['cohort_id','product_id'])
found['new_min'] = found['min_market_margin']
found['min_change'] = (found['new_min_margin']-found['min_market_margin'])/found['min_market_margin']

found.loc[(found['current_margin']>=found['min_market_margin'])&(found['tier'] == 1),'new_min'] = found['min_market_margin']
found.loc[(found['current_margin']>=found['min_market_margin'])&(found['tier'] == 2),'new_min'] = found['new_min_margin']
found.loc[(found['current_margin']>=found['min_market_margin'])&(found['tier'] == 3),'new_min'] = found['avg_market_margin']
found.loc[(found['current_margin']>=found['min_market_margin'])&(found['tier'] == 4),'new_min'] = found['new_avg_margin']

found.loc[(found['new_min'] == found['min_market_margin'])&(found['min_change']>=0.5),'new_min']= found.loc[(found['new_min'] == found['min_market_margin'])&(found['min_change'])>=0.5,'new_min_margin']
found.loc[found['current_margin']<0,'new_min'] = found['current_margin']

found.loc[found['new_min'] < found['min_market_margin'],'new_min'] = found['min_market_margin']

found['diff'] = (found['max_margin'] - found['min_margin'])/found['min_margin']

found['new_max']= found['new_min'] + np.minimum(np.maximum((found['diff']*found['new_min']),0.01),0.04)

found.loc[found['max_margin'].isna(),'new_max'] = np.nan

found=found[['cohort_id','product_id','new_min','new_max']]

In [None]:
market_data[market_data['product_id']==201]

In [None]:
min_max_df['flag']=1
not_found = market_data.merge(min_max_df[['cohort_id','product_id','flag']],on=['cohort_id','product_id'],how='left')
not_found = not_found.merge(sales[['cohort_id','product_id','tier']],on=['cohort_id','product_id'])
not_found = not_found[not_found['flag'].isna()]

not_found['new_min'] = not_found['min_market_margin']
not_found['min_change'] = (not_found['new_min_margin']-not_found['min_market_margin'])/not_found['min_market_margin']

not_found.loc[(not_found['current_margin']>=not_found['min_market_margin'])&(not_found['tier'] == 1),'new_min'] = not_found['min_market_margin']
not_found.loc[(not_found['current_margin']>=not_found['min_market_margin'])&(not_found['tier'] == 2),'new_min'] = not_found['new_min_margin']
not_found.loc[(not_found['current_margin']>=not_found['min_market_margin'])&(not_found['tier'] == 3),'new_min'] = not_found['avg_market_margin']
not_found.loc[(not_found['current_margin']>=not_found['min_market_margin'])&(not_found['tier'] == 4),'new_min'] = not_found['new_avg_margin']

not_found.loc[(not_found['new_min'] == not_found['min_market_margin'])&(not_found['min_change'])>=0.5,'new_min']=not_found['new_min_margin']

not_found['diff'] = np.minimum(np.maximum(0.3*not_found['target_margin'],0.01),0.04)
not_found['new_max'] = not_found['new_min']+not_found['diff'] 
not_found=not_found[['cohort_id','product_id','new_min','new_max']]

In [None]:
market_data['flag_2'] = 1 
main_found = min_max_df.merge(market_data,on=['cohort_id','product_id'],how='left')
main_found=main_found[main_found['flag_2'].isna()]
main_found = main_found[['cohort_id','product_id','min_margin','max_margin','enforce']]

In [None]:
final_df = pd.concat([found,not_found],axis=0)
final_df=final_df.drop_duplicates()
regions = pd.DataFrame({
    'region': ['Cairo', 'Giza', 'Delta West', 'Delta East', 'Upper Egypt', 
               'Upper Egypt', 'Upper Egypt', 'Upper Egypt', 'Alexandria'],
    'cohort_id': [700, 701, 703, 704, 1124, 1126, 1123, 1125, 702]
})
final_df=final_df.merge(regions,on=['cohort_id'])
final_df=final_df[['cohort_id','product_id','new_min','new_max']]
final_df=final_df.drop_duplicates()
final_df.columns = ['cohort_id','product_id','min_margin','max_margin']
final_df = pd.concat([final_df,main_found[['cohort_id','product_id','min_margin','max_margin']]],axis=0)
price_ups=price_ups.merge(regions,on=['region'])
final_df=final_df.merge(price_ups,on=['product_id','cohort_id'],how='left')

In [None]:
final_df.loc[~final_df['new_pp'].isna(),'max_margin'] = np.minimum(final_df['max_margin']+0.15,final_df['min_margin']+0.2)
final_df['enforce'] = np.nan
final_df.loc[~final_df['new_pp'].isna(),'enforce']= 1  
final_df=final_df.drop_duplicates()
final_df = final_df.merge(sales,on=['cohort_id','product_id'])
final_df = final_df[['cohort_id', 'product_id','sku', 'min_margin', 'max_margin', 'enforce','brand']]


In [None]:
final_df.to_excel('min_max_data.xlsx')

In [None]:
final_df[final_df['enforce']==1]

In [None]:
1
