In [1]:
%%capture

# Upgrade pip
!pip install --upgrade pip
# Connectivity
!pip install psycopg2-binary  # PostgreSQL adapter
# !pip install snowflake-connector-python  # Snowflake connector
!pip install snowflake-connector-python==3.15.0 # Snowflake connector Older Version
!pip install snowflake-sqlalchemy  # Snowflake SQLAlchemy connector
!pip install warnings # Warnings management
# !pip install pyarr # Serialization
!pip install keyring==23.11.0 # Key management
!pip install sqlalchemy==1.4.46 # SQLAlchemy
!pip install requests # HTTP requests
!pip install boto3 # AWS SDK
# !pip install slackclient # Slack API
!pip install oauth2client # Google Sheets API
!pip install gspread==5.9.0 # Google Sheets API
!pip install gspread_dataframe # Google Sheets API
!pip install google.cloud # Google Cloud
# Data manipulation and analysis
!pip install polars
!pip install pandas==2.2.1
!pip install numpy
# !pip install fastparquet
!pip install openpyxl # Excel file handling
!pip install xlsxwriter # Excel file handling
# Linear programming
!pip install pulp
# Date and time handling
!pip install --upgrade datetime
!pip install python-time
!pip install --upgrade pytz
# Progress bar
!pip install tqdm
# Database data types
!pip install db-dtypes
# Geospatial data handling
# !pip install geopandas
# !pip install shapely
# !pip install fiona
# !pip install haversine
# Plotting

# Modeling
!pip install statsmodels
!pip install scikit-learn

!pip install import-ipynb

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import calendar
import json
from datetime import date, timedelta
from oauth2client.service_account import ServiceAccountCredentials
import setup_environment_2
import importlib
import import_ipynb
import warnings
import time
warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()
import gspread

/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


In [3]:
status = "min_market" 

In [4]:
def query_snowflake(query, columns=[]):
    import os
    import snowflake.connector
    import numpy as np
    import pandas as pd
    con = snowflake.connector.connect(
        user =  os.environ["SNOWFLAKE_USERNAME"],
        account= os.environ["SNOWFLAKE_ACCOUNT"],
        password= os.environ["SNOWFLAKE_PASSWORD"],
        database =os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        if len(columns) == 0:
            out = pd.DataFrame(np.array(cur.fetchall()))
        else:
            out = pd.DataFrame(np.array(cur.fetchall()),columns=columns)
        return out
    except Exception as e:
        print("Error: ", e)
    finally:
        cur.close()
        con.close()

In [5]:
query = '''
SHOW PARAMETERS LIKE 'TIMEZONE'
'''
x  = query_snowflake(query)
zone_to_use = x[1].values[0]
zone_to_use

'America/Los_Angeles'

In [6]:
scope = ["https://spreadsheets.google.com/feeds",
         'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(setup_environment_2.get_secret("prod/maxab-sheets")), scope)
client = gspread.authorize(creds)
min_max = client.open('Demand Based Dynamic Pricing').worksheet('min_max_margin_cohort')
min_max_df = pd.DataFrame(min_max.get_all_records())
for col in min_max_df.columns:
    min_max_df[col] = pd.to_numeric(min_max_df[col], errors='ignore') 
min_max_df = min_max_df[min_max_df['min_margin']>0.01]    

In [7]:
Blue_FD_brands =  client.open('Anniversary Campaign 2025 (Final)').worksheet('Suppliers Brands')
Blue_FD_brands_df = pd.DataFrame(Blue_FD_brands.get_all_records())[['Brands']].drop_duplicates()
blue_list = [brand  for brand in Blue_FD_brands_df['Brands']]

In [8]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id)),
full_data as (
select products.id as product_id, region
from products , whs 
where activation = 'true'
),				

MP as (
select region,product_id,
min(min_price) as min_price,
min(max_price) as max_price,
min(mod_price) as mod_price,
min(true_min) as true_min,
min(true_max) as true_max

from (
select mp.region,mp.product_id,mp.pu_id,
min_price/BASIC_UNIT_COUNT as min_price,
max_price/BASIC_UNIT_COUNT as max_price,
mod_price/BASIC_UNIT_COUNT as mod_price,
TRUE_MIN_PRICE/BASIC_UNIT_COUNT as true_min,
TRUE_MAX_PRICE/BASIC_UNIT_COUNT as true_max
from materialized_views.marketplace_prices mp 
join packing_unit_products pup on pup.product_id = mp.product_id and pup.packing_unit_id = mp.pu_id
join finance.all_cogs f on f.product_id = mp.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date
where  least(min_price,mod_price) between wac_p*0.9 and wac_p*1.3 
)
group by all 
),
region_mapping AS (
    SELECT * 
	FROM 
	(	VALUES
        ('Delta East', 'Delta West'),
        ('Delta West', 'Delta East'),
        ('Alexandria', 'Cairo'),
        ('Alexandria', 'Giza'),
        ('Upper Egypt', 'Cairo'),
        ('Upper Egypt', 'Giza'),
		('Cairo','Giza'),
		('Giza','Cairo'),
		('Delta West', 'Cairo'),
		('Delta East', 'Cairo'),
		('Delta West', 'Giza'),
		('Delta East', 'Giza')
		)
    AS region_mapping(region, fallback_region)
),
final_mp as (
select region,product_id,
min(final_min_price) as final_min_price,
min(final_max_price) as final_max_price,
min(final_mod_price) as final_mod_price,
min(final_true_min) as final_true_min,
min(final_true_max) as final_true_max

from (
SELECT
distinct 
	w.region,
	w.product_id,
    COALESCE(m1.min_price, m2.min_price) AS final_min_price,
    COALESCE(m1.max_price, m2.max_price) AS final_max_price,
    COALESCE(m1.mod_price, m2.mod_price) AS final_mod_price,
	COALESCE(m1.true_min, m2.true_min) AS final_true_min,
	COALESCE(m1.true_max, m2.true_max) AS final_true_max,
FROM full_data w
LEFT JOIN MP m1
    ON w.region = m1.region and w.product_id = m1.product_id
JOIN region_mapping rm
    ON w.region = rm.region
LEFT JOIN MP m2
    ON rm.fallback_region = m2.region
   AND w.product_id = m2.product_id
)
where final_min_price is not null 
group by all 
),
ben_soliman as (
select z.* 
from (
select maxab_product_id as product_id,maxab_sku as sku,avg(bs_final_price) as ben_soliman_price
from (
select * , row_number()over(partition by maxab_product_id order by diff) as rnk_2
from (
select *,(bs_final_price-wac_p)/wac_p as diff_2
from (
select * ,bs_price/maxab_basic_unit_count as bs_final_price
from (
select *,row_number()over(partition by maxab_product_id,maxab_pu order by diff) as rnk 
from (
select sm.* ,max(INJECTION_DATE::date)over(partition by maxab_product_id,maxab_pu) as max_date,wac1,wac_p,abs(bs_price-(wac_p*maxab_basic_unit_count))/(wac_p*maxab_basic_unit_count) as diff 
from materialized_views.savvy_mapping sm 
join finance.all_cogs f on f.product_id = sm.maxab_product_id and current_timestamp between f.from_Date and f.to_date
where bs_price is not null 
and INJECTION_DATE::date >= CURRENT_DATE- 5
qualify INJECTION_DATE::date = max_date
)
qualify rnk = 1 
)
)
where diff_2 between -0.5 and 0.5 
)
qualify rnk_2 = 1 
)
group by all
)z 
join finance.all_cogs f on f.product_id = z.product_id and current_timestamp between f.from_Date and f.to_date

where ben_soliman_price between f.wac_p*0.7 and f.wac_p*1.3
),
scrapped_data as (
select product_id,cat,brand,region,max_date,min(MARKET_PRICE) as min_scrapped,max(MARKET_PRICE) as max_scrapped,median(MARKET_PRICE) as median_scrapped
from (
select MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.*,max(date)over(partition by region,MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id,competitor) as max_date
from MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES
join finance.all_cogs f on f.product_id = MATERIALIZED_VIEWS.CLEANED_MARKET_PRICES.product_id and CURRENT_TIMESTAMP between f.from_date and f.to_date 
where date>= current_date -5
and MARKET_PRICE between f.wac_p * 0.7 and wac_p*1.3
qualify date = max_date 
)
group by all 
),
local_prices as (
SELECT  case when cpu.cohort_id in (700) then 'Cairo'
             when cpu.cohort_id in (701) then 'Giza'
             when cpu.cohort_id in (704) then 'Delta East'
             when cpu.cohort_id in (703) then 'Delta West'
             when cpu.cohort_id in (1123,1124,1125,1126) then 'Upper Egypt'
             when cpu.cohort_id in (702) then 'Alexandria'
        end as region,
		cohort_id,
        pu.product_id,
		pu.packing_unit_id as packing_unit_id,
		pu.basic_unit_count,
        avg(cpu.price) as price
FROM    cohort_product_packing_units cpu
join    PACKING_UNIT_PRODUCTS pu on pu.id = cpu.product_packing_unit_id
WHERE   cpu.cohort_id in (700,701,702,703,704,1123,1124,1125,1126)
    and cpu.created_at::date<>'2023-07-31'
    and cpu.is_customized = true
	group by all 
),
live_prices as (
select region,cohort_id,product_id,pu_id as packing_unit_id,buc as basic_unit_count,NEW_PRICE as price
from materialized_views.DBDP_PRICES
where created_at = Current_timestamp::date
and DATE_PART('hour', Current_timestamp::time) BETWEEN SPLIT_PART(time_slot, '-', 1)::int AND SPLIT_PART(time_slot, '-', 2)::int
and cohort_id in (700,701,702,703,704,696,695,698,697,699,1123,1124,1125,1126)
),
prices as (
select *
from (
    SELECT *, 1 AS priority FROM live_prices
    UNION ALL
    SELECT *, 2 AS priority FROM local_prices
)
QUALIFY ROW_NUMBER() OVER (PARTITION BY region,cohort_id,product_id,packing_unit_id ORDER BY priority) = 1
),

maxab_prices as (
select region,cohort_id,product_id,price 
from prices 
where basic_unit_count = 1 
),
sales as (
SELECT  DISTINCT
		cpc.cohort_id,
		pso.product_id,
        sum(pso.total_price) as nmv
FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id   
join COHORT_PRICING_CHANGES cpc on cpc.id = pso.cohort_pricing_change_id
WHERE   True
    AND so.created_at::date between date_trunc('month', Current_timestamp::date - 120) and Current_timestamp::date - 1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
GROUP BY ALL
),
margin_change as (
select product_id,cohort_id,(0.6*product_std) +(0.3*brand_std) + (0.1*cat_std) as std,avg_margin
from (
select product_id,cohort_id,stddev(product_margin) as product_std , stddev(brand_margin) as brand_std,stddev(cat_margin) as cat_std,avg(product_margin) as avg_margin
from (
select distinct product_id,order_date,cohort_id,(nmv-cogs_p)/nmv as product_margin,(brand_nmv-brand_cogs)/brand_nmv as brand_margin,(cat_nmv-cat_cogs)/cat_nmv as cat_margin
from(
SELECT  DISTINCT
		so.created_at::date as order_date,
		cpc.cohort_id,
		pso.product_id,
		brands.name_ar as brand, 
		categories.name_ar as cat,
       sum(COALESCE(f.wac_p,0) * pso.purchased_item_count * pso.basic_unit_count) as cogs_p,
    	sum(pso.total_price) as nmv,
		sum(nmv) over(partition by order_date,cat,brand) as brand_nmv,
		sum(cogs_p) over(partition by order_date,cat,brand) as brand_cogs,
		sum(nmv) over(partition by order_date,cat) as cat_nmv,
		sum(cogs_p) over(partition by order_date,cat) as cat_cogs
FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id   
join COHORT_PRICING_CHANGES cpc on cpc.id = pso.cohort_pricing_change_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f  ON f.product_id = pso.product_id
                        AND f.from_date::date <= so.created_at::date
                        AND f.to_date::date > so.created_at::date
						
WHERE  so.created_at::date between date_trunc('month', Current_timestamp::date - 120) and Current_timestamp::date
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
GROUP BY ALL
)
)

group by all 
)
),
cat_brand_target as (
SELECT DISTINCT cat, brand, margin as target_bm
FROM    performance.commercial_targets cplan
QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE)OVER()) = DATE_TRUNC('month', Current_timestamp::date) THEN DATE_TRUNC('month', Current_timestamp::date)
ELSE DATE_TRUNC('month', Current_timestamp::date - INTERVAL '1 month') END = DATE_TRUNC('month', date)
),
cat_target as (

select cat,sum(target_bm *(target_nmv/cat_total)) as cat_target_margin
from (
select *,sum(target_nmv)over(partition by cat) as cat_total
from (
select cat,brand,avg(target_bm) as target_bm , sum(target_nmv) as target_nmv
from (
SELECT DISTINCT date,city as region,cat, brand, margin as target_bm,nmv as target_nmv
FROM    performance.commercial_targets cplan
QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE)OVER()) = DATE_TRUNC('month', Current_timestamp::date) THEN DATE_TRUNC('month', Current_timestamp::date)
ELSE DATE_TRUNC('month', Current_timestamp::date - INTERVAL '1 month') END = DATE_TRUNC('month', date)
)
group by all
)
)
group by all 
)


select distinct maxab.cohort_id,
maxab.product_id,
CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
brands.name_ar as brand, 
categories.name_ar as cat,
sections.name_ar as section_name,
maxab.price as maxab_price,
bs.ben_soliman_price,
final_min_price,
final_max_price,
final_mod_price,
min_scrapped,
median_scrapped,
max_scrapped,
wac_p,
coalesce(nmv,0) as nmv,
coalesce(mc.std,0.01) as std,
coalesce(coalesce(cbt.target_bm , ct.cat_target_margin),0) as target_margin,
coalesce(avg_margin,0) as avg_margin

from maxab_prices maxab
left join ben_soliman bs on bs.product_id = maxab.product_id
left join final_mp fmp on fmp.product_id = maxab.product_id and fmp.region = maxab.region
left join sales s on s.product_id = maxab.product_id and s.cohort_id = maxab.cohort_id
left join scrapped_data  sd on sd.product_id = maxab.product_id and sd.region = maxab.region
join finance.all_cogs f on f.product_id = maxab.product_id and  CURRENT_TIMESTAMP between f.from_date and f.to_date
JOIN products on products.id=maxab.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN sections ON sections.id = categories.section_id
JOIN product_units ON product_units.id = products.unit_id 
left join margin_change mc on mc.product_id = maxab.product_id and mc.cohort_id = maxab.cohort_id
left join cat_brand_target cbt on cbt.brand = brands.name_ar and cbt.cat = categories.name_ar 
left join cat_target ct on ct.cat = categories.name_ar 
'''
market_main_data   = query_snowflake(query, columns = ['cohort_id','product_id','sku','brand','cat','section_name','maxab_price','ben_soliman_price','final_min_price','final_max_price','final_mod_price','min_scrapped','median_scrapped','max_scrapped','wac_p','nmv','std','target_margin','avg_margin'])
for col in market_main_data.columns:
    market_main_data[col] = pd.to_numeric(market_main_data[col], errors='ignore')   
market_main_data = market_main_data[['cohort_id','product_id','sku','brand','cat','section_name','maxab_price','ben_soliman_price','final_min_price','final_max_price','final_mod_price','min_scrapped','median_scrapped','max_scrapped','wac_p','nmv','std','target_margin','avg_margin']]   
market_main_data=market_main_data.drop_duplicates(subset=['cohort_id','product_id'])

In [9]:
query = '''
select * 
from materialized_views.sku_commercial_groups
'''
groups  = setup_environment_2.dwh_pg_query(query, columns = ['product_id','group'])
groups.columns = groups.columns.str.lower()
for col in groups.columns:
    groups[col] = pd.to_numeric(groups[col], errors='ignore')       

In [10]:
query ='''
select region , product_id,new_pp,forecasted_date
from materialized_views.DBDP_PRICE_UPS
'''
price_ups  = query_snowflake(query, columns = ['region','product_id','new_pp','forcasted_date'])
price_ups.columns = price_ups.columns.str.lower()
for col in price_ups.columns:
    price_ups[col] = pd.to_numeric(price_ups[col], errors='ignore')    

In [11]:
query = '''
SELECT  DISTINCT
		cpc.cohort_id,  
		pso.product_id,
		CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
		brands.name_ar as brand, 
		categories.name_ar as cat,
        sum(pso.total_price) as nmv


FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
join COHORT_PRICING_CHANGES cpc on cpc.id = pso.COHORT_PRICING_CHANGE_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN product_units ON product_units.id = products.unit_id 
          

WHERE   True
    AND so.created_at::date between  current_date - 120 and current_date -1 
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
    and cpc.cohort_id in (700,701,702,703,704,1123,1124,1125,1126)

GROUP BY ALL
'''
sales  = query_snowflake(query, columns = ['cohort_id','product_id','sku','brand','cat','nmv'])
sales.columns = sales.columns.str.lower()
for col in sales.columns:
    sales[col] = pd.to_numeric(sales[col], errors='ignore')    

In [12]:
def assign_tier(cumulative_contribution):
    if cumulative_contribution <= 0.4:
        return 1
    elif cumulative_contribution <= 0.6:
        return 2
    elif cumulative_contribution <= 0.8:
        return 3
    elif cumulative_contribution <= 0.95:
        return 4
    else: 
        return 5 
    

In [13]:
below_market = ['شويبس','كوكا كولا']
min_brands = ['فاميليا','مولبد','مولفيكس','اوكسي','جوي','ريفولي','البوادي','هارفست فوودز','هاينز','بيبسي']
avg_brands = ['بخيره','جود كير']
max_brands =['فيوري']
min_cats = ['تونة و سمك']

In [14]:
sales['total_nmv'] = sales.groupby('cohort_id')['nmv'].transform(sum)
sales['cntrb_nmv'] = sales['nmv']/sales['total_nmv']
sales = sales.sort_values(['cohort_id', 'nmv'], ascending=[True, False])
sales['nmv_cumulative_cntrb'] = sales.groupby('cohort_id')['cntrb_nmv'].cumsum()
sales['tier'] = sales['nmv_cumulative_cntrb'].apply(assign_tier)

In [15]:
sales.loc[sales['cat'].isin(min_cats),'tier']=np.maximum(sales['tier']-1,1)
sales.loc[sales['brand'].isin(blue_list),'tier']=np.maximum(sales['tier']-1,1)
sales.loc[sales['brand'].isin(min_brands),'tier']=1
sales.loc[sales['brand'].isin(below_market),'tier']=0
sales.loc[sales['brand'].isin(avg_brands),'tier']=3
sales.loc[sales['brand'].isin(max_brands),'tier']=5

In [17]:
if 'min' in status:
    sales['new_tier'] = sales['tier']
    sales['new_tier'] = np.maximum(sales['tier']-1,0)
    sales['tier'] =sales['new_tier']  

In [18]:
market_data =market_main_data.copy() 
market_data=market_data.merge(groups,on='product_id',how='left')
groups_data = market_data[(~market_data['group'].isna())]
groups_data['group_nmv'] = groups_data.groupby(['group','cohort_id'])['nmv'].transform(sum)
groups_data['cntrb']  = (groups_data['nmv']/groups_data['group_nmv']).fillna(1)
groups_data = groups_data.groupby(['group','cohort_id']).agg({'ben_soliman_price':'median','final_min_price':'median','final_max_price':'median','final_mod_price':'median','min_scrapped':'median','median_scrapped':'median','max_scrapped':'median'}).reset_index()
merged = market_data.merge(
    groups_data,
    on=['group','cohort_id'],
    how='left',
    suffixes=('', '_group')
)
cols = ['ben_soliman_price','final_min_price','final_max_price','final_mod_price','min_scrapped','median_scrapped','max_scrapped']
for col in cols:
    merged[col] = merged[col].fillna(merged[col + '_group'])

merged = merged.drop(columns=[c + '_group' for c in cols])
market_data = merged.copy()

In [19]:
market_data['remove'] = 0
#market_data.loc[(market_data['ben_soliman_price'].isna())&(((market_data['final_min_price'] == market_data['final_mod_price']) & (market_data['final_max_price'] == market_data['final_mod_price'])&(market_data['min_scrapped'].isna()))|   ((market_data['min_scrapped'] == market_data['median_scrapped']) & (market_data['median_scrapped'] == market_data['max_scrapped'])&(market_data['final_min_price'].isna()))),'remove'] = 1 
market_data = market_data[market_data['remove']==0]

In [20]:
def price_analysis(r):
    price = r['maxab_price']
    wac = r['wac_p']
    avg_margin = r['avg_margin']
    std = r['std']
    target_margin = r['target_margin']
    price_list = [
    r['ben_soliman_price'],
    r['final_min_price'],
    r['final_mod_price'],
    r['final_max_price'],
    r['min_scrapped'],
    r['median_scrapped'], 
    r['max_scrapped']   
    ]
    if avg_margin < 0.01:
        avg_margin = target_margin
        
    price_list= list({x for x in price_list if x not in [0, np.nan] and not pd.isna(x)})
    price_list= list({x for x in price_list if x >= wac/(1-(avg_margin-(2.5*std))) and x<= wac/(1-(avg_margin+(4*std))) and x>=wac})
    price_list.sort()
    if len(price_list) > 0 :
        minimum = np.min(price_list)
        percentile_25 = np.percentile(price_list, 25)
        percentile_50 = np.percentile(price_list, 50)
        percentile_75 = np.percentile(price_list, 75)
        maximum = np.max(price_list)
    else:
        minimum = np.nan
        percentile_25=np.nan
        percentile_50 = np.nan
        percentile_75 = np.nan
        maximum = np.nan
    return  minimum,percentile_25,percentile_50,percentile_75,maximum

market_data[['minimum','percentile_25','percentile_50','percentile_75','maximum']] = market_data.apply(price_analysis, axis=1, result_type='expand')
market_data = market_data[~market_data['minimum'].isna()]

In [21]:
def step_min_max(x):
    wac = x['wac_p']
    price = x['maxab_price']
    array = [x['minimum'],x['percentile_25'],x['percentile_50'],x['percentile_75'],x['maximum']]
    std = x['std']
    sa = []
    minimum = x['minimum']
    maximum = x['maximum']
    for i in range(0,len(array)-1):
        v1 = array[i]
        v2 = array[i+1]
        step = v2-v1
        step_value = step / wac
        if(step_value <= std *1.2):
            sa.append(v2-v1)
    avg_step =  (np.mean(sa))
    if avg_step ==0:
        avg_step = np.minimum(2*std,0.2*x['target_margin'])
    new_min = minimum-avg_step
    new_max = maximum+avg_step
    if new_min < wac:
        new_min = minimum
    if new_max < wac:
        new_max = maximum
    
    return  new_min , new_max
        
market_data[['below_market','above_market']] = market_data.apply(step_min_max, axis=1, result_type='expand')

In [22]:
market_data = market_data[['cohort_id', 'product_id','maxab_price','wac_p', 'minimum', 'percentile_25', 'percentile_50', 'percentile_75', 'maximum','below_market','above_market']]
market_data['below_market'] = (market_data['below_market'] -market_data['wac_p'])/market_data['below_market'] 
market_data['market_min'] = (market_data['minimum'] -market_data['wac_p'])/market_data['minimum'] 
market_data['market_25'] = (market_data['percentile_25'] -market_data['wac_p'])/market_data['percentile_25'] 
market_data['market_50'] = (market_data['percentile_50'] -market_data['wac_p'])/market_data['percentile_50'] 
market_data['market_75'] = (market_data['percentile_75'] -market_data['wac_p'])/market_data['percentile_75'] 
market_data['market_max'] = (market_data['maximum'] -market_data['wac_p'])/market_data['maximum'] 
market_data['above_market'] = (market_data['above_market'] -market_data['wac_p'])/market_data['above_market'] 
market_data['current_margin'] = (market_data['maxab_price'] -market_data['wac_p'])/market_data['maxab_price'] 
market_data = market_data[['cohort_id', 'product_id','current_margin','below_market' ,'market_min', 'market_25','market_50', 'market_75', 'market_max','above_market']]

In [23]:
found = min_max_df.merge(market_data,on=['cohort_id','product_id'])
found = found.merge(sales[['cohort_id','product_id','tier']],on=['cohort_id','product_id'])

cond = [found['tier']==0,found['tier']==1,found['tier']==2,found['tier']==3,found['tier']==4,found['tier']==5] 
cho = [found['below_market'],found['market_min'],found['market_25'],found['market_50'],found['market_75'],found['market_max']]
cho2 = [found['market_min'],found['market_25'],found['market_50'],found['market_75'],found['market_max'],found['market_max']*1.2]

found['selected_min'] = np.select(cond,cho,default =found['market_min'] )
found['selected_max'] = np.select(cond,cho2,default =found['market_min'] )

found['min_cu_diff'] = (found['selected_min']-found['current_margin'])/found['current_margin']
found['min_min_diff'] = (found['selected_min']-found['min_margin'])/found['min_margin']

found = found[((found['min_cu_diff'] <= 0.55) & (found['min_cu_diff'] >= -0.55))|((found['min_min_diff'] <= 0.55) & (found['min_min_diff'] >= -0.55))]

found['diff'] = (found['max_margin'] - found['min_margin'])/found['min_margin']

found['new_min'] = found['selected_min']
found['new_max'] = np.minimum(np.maximum(np.maximum((found['diff']+1)*found['selected_min'],found['selected_max']),found['selected_min']+0.01),found['selected_min']+0.04)
found=found[['cohort_id','product_id','new_min','new_max']]
found['type'] = 'both'

In [24]:
min_max_df['flag']=1
not_found = market_data.merge(min_max_df[['cohort_id','product_id','flag']],on=['cohort_id','product_id'],how='left')
not_found = not_found.merge(sales[['cohort_id','product_id','tier']],on=['cohort_id','product_id'])
not_found = not_found[not_found['flag'].isna()]
cond = [not_found['tier']==0,not_found['tier']==1,not_found['tier']==2,not_found['tier']==3,not_found['tier']==4,not_found['tier']==5] 
cho = [not_found['below_market'],not_found['market_min'],not_found['market_25'],not_found['market_50'],not_found['market_75'],not_found['market_max']]
cho2 = [not_found['market_min'],not_found['market_25'],not_found['market_50'],not_found['market_75'],not_found['market_max'],not_found['market_max']*1.2]
not_found['selected_min'] = np.select(cond,cho,default =not_found['market_min'] )
not_found['selected_max'] = np.select(cond,cho2,default =not_found['market_min'] )
not_found['min_cu_diff'] = (not_found['selected_min']-not_found['current_margin'])/not_found['current_margin']
not_found = not_found[((not_found['min_cu_diff'] <= 2) & (not_found['min_cu_diff'] >= -2))]
not_found['new_min'] = not_found['selected_min']
not_found['new_max'] = np.minimum(np.maximum(not_found['selected_max'],not_found['selected_min']+0.01),not_found['selected_min']+0.04)
not_found=not_found[['cohort_id','product_id','new_min','new_max']]
not_found['type'] = 'MP_only'

In [25]:
final_df = pd.concat([found,not_found],axis=0)
final_df=final_df.drop_duplicates()
final_df

Unnamed: 0,cohort_id,product_id,new_min,new_max,type
0,700,3,0.051860,0.081171,both
1,701,3,0.041931,0.066391,both
2,702,3,0.043734,0.059734,both
3,703,3,0.050679,0.082501,both
4,704,3,0.047987,0.070307,both
...,...,...,...,...,...
10636,1123,2109,0.050446,0.060446,MP_only
10665,701,23380,0.058091,0.068091,MP_only
10744,703,10596,0.050042,0.060042,MP_only
10771,1123,7182,0.044240,0.056313,MP_only


In [26]:
regions = pd.DataFrame({
    'region': ['Cairo', 'Giza', 'Delta West', 'Delta East', 'Upper Egypt', 
               'Upper Egypt', 'Upper Egypt', 'Upper Egypt', 'Alexandria'],
    'cohort_id': [700, 701, 703, 704, 1124, 1126, 1123, 1125, 702]
})
final_df=final_df.merge(regions,on=['cohort_id'])
final_df=final_df[['cohort_id','product_id','new_min','new_max','type']]
final_df=final_df.drop_duplicates()
final_df.columns = ['cohort_id','product_id','min_margin','max_margin','type']
#final_df = pd.concat([final_df,main_found[['cohort_id','product_id','min_margin','max_margin','type']]],axis=0)
final_df

Unnamed: 0,cohort_id,product_id,min_margin,max_margin,type
0,700,3,0.051860,0.081171,both
1,701,3,0.041931,0.066391,both
2,702,3,0.043734,0.059734,both
3,703,3,0.050679,0.082501,both
4,704,3,0.047987,0.070307,both
...,...,...,...,...,...
10621,1123,2109,0.050446,0.060446,MP_only
10622,701,23380,0.058091,0.068091,MP_only
10623,703,10596,0.050042,0.060042,MP_only
10624,1123,7182,0.044240,0.056313,MP_only


In [27]:
from datetime import datetime
week_number = datetime.now().isocalendar()[1]
week_number_c = str(week_number)
week_number_p = str(week_number-1)
week_number_p2 = str(week_number-2)
print(week_number_c,week_number_p,week_number_p2)

49 48 47


In [28]:
scope = ["https://spreadsheets.google.com/feeds",
         'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(setup_environment_2.get_secret("prod/maxab-sheets")), scope)
client = gspread.authorize(creds)
tgto = client.open('Egypt SKUs Aging Monitor').worksheets()
worksheet_names = [ws.title for ws in tgto]
sheet_name = ""
for name in worksheet_names:
    if week_number_c in name:
        sheet_name = name
        break
    elif week_number_p in name:
        sheet_name = name 
    elif week_number_p2 in name:
        sheet_name = name 
tgto = client.open('Egypt SKUs Aging Monitor').worksheet(f'{sheet_name}')

data = tgto.get_all_values()

# Convert to DataFrame
if data:
    tgto_df = pd.DataFrame(data[2:], columns=data[1])
    tgto_df = tgto_df.iloc[:, :21]
    print("ok")
else:
    tgto_df = pd.DataFrame()
for col in tgto_df.columns:
    tgto_df[col] = pd.to_numeric(tgto_df[col], errors='ignore')   
tgto_df = tgto_df[tgto_df['Fulfillment confirmation']=='confirmed']


tgto_df=tgto_df[['SKU', 'Sharqya', 'Khorshed Alex', 'Bani sweif',
       'Mostorod', 'Barageel', 'El-Mahala', 'Sohag', 'Mansoura FC',
       'Assiut FC', 'Menya Samalot', 'Tanta']]    
def convert_string(x):
    id_ = x.SKU
    try:
        id_ = id_.replace(",", "")
        id_ = int(id_)
    except:
        pass
    return id_
df_long = tgto_df.melt(
    id_vars=['SKU'], 
    var_name='warehouse', 
    value_name='stocks'
)
df_long['product_id'] = df_long.apply(convert_string,axis=1)
df_long = df_long.drop(columns = 'SKU')
df_long = df_long[~df_long['stocks'].isna()]
df_long

ok


Unnamed: 0,warehouse,stocks,product_id
18,Sharqya,24.0,12345
20,Sharqya,27.0,1064
21,Sharqya,7.0,8638
29,Sharqya,37.0,2270
36,Sharqya,11.0,12858
...,...,...,...
2112,Tanta,1.0,8486
2114,Tanta,8.0,9620
2125,Tanta,6.0,11295
2131,Tanta,5.0,8935


In [29]:
query = f'''
select product_id,wac_p
from finance.all_cogs f 
where CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp()) between f.from_date and f.to_date 
'''
wacs   = query_snowflake(query, columns = ['product_id','wac_p'])
wacs.columns = wacs.columns.str.lower()
for col in wacs.columns:
    wacs[col] = pd.to_numeric(wacs[col], errors='ignore') 

In [30]:
whs = pd.DataFrame([
    ('Cairo', 'El-Marg', 38, 700),
    ('Cairo', 'Mostorod', 1, 700),
    ('Giza', 'Barageel', 236, 701),
    ('Delta West', 'El-Mahala', 337, 703),
    ('Delta West', 'Tanta', 8, 703),
    ('Delta East', 'Mansoura FC', 339, 704),
    ('Delta East', 'Sharqya', 170, 704),
    ('Upper Egypt', 'Assiut FC', 501, 1124),
    ('Upper Egypt', 'Bani sweif', 401, 1126),
    ('Upper Egypt', 'Menya Samalot', 703, 1123),
    ('Upper Egypt', 'Sohag', 632, 1125),
    ('Alexandria', 'Khorshed Alex', 797, 702),
    ('Giza', 'Sakkarah', 962, 701)
], columns=['region', 'warehouse', 'warehouse_id', 'cohort_id'])

In [31]:
query = '''
SELECT DISTINCT product_warehouse.warehouse_id,
                product_warehouse.product_id,
                (product_warehouse.available_stock)::integer as stocks,
        from  product_warehouse 
        JOIN products on product_warehouse.product_id = products.id
        JOIN product_units ON products.unit_id = product_units.id
        where   product_warehouse.warehouse_id not in (6,9,10)
            AND product_warehouse.activation = 'true'
            AND product_warehouse.is_basic_unit = 1
'''
stocks   = query_snowflake(query, columns = ['warehouse_id','product_id','cu_stocks'])
stocks.columns = stocks.columns.str.lower()
for col in stocks.columns:
    stocks[col] = pd.to_numeric(stocks[col], errors='ignore') 

In [32]:
query = f'''
select region,product_id,optimal_bm,MIN_BOUNDARY,MAX_BOUNDARY,MEDIAN_BM
from (
select region,product_id,target_bm,optimal_bm,MIN_BOUNDARY,MAX_BOUNDARY,MEDIAN_BM,max(created_at) over(partition by product_id,region) as max_date,created_at
from materialized_views.PRODUCT_STATISTICS
where created_at::date >= date_trunc('month',current_date - 60)
qualify max_date = created_at
)

'''
 
stats = query_snowflake(query, columns = ['region','product_id','optimal_bm','MIN_BOUNDARY','MAX_BOUNDARY','MEDIAN_BM'])
stats.columns = stats.columns.str.lower()
for col in stats.columns:
    stats[col] = pd.to_numeric(stats[col], errors='ignore')

In [33]:
# to_remove = pd.DataFrame([
# (8673,401)
# ], columns=['product_id', 'warehouse_id'])
# to_remove['remove'] =1 
# to_remove

Unnamed: 0,product_id,warehouse_id,remove
0,8673,401,1


In [35]:
tgtg = df_long.merge(wacs,on='product_id')
tgtg = tgtg.merge(whs,on='warehouse')
tgtg=tgtg.merge(to_remove,on=['product_id','warehouse_id'],how='left')
tgtg=tgtg[tgtg['remove'].isna()]
tgtg = tgtg.merge(stocks,on=['warehouse_id','product_id'])
tgtg =tgtg[tgtg['cu_stocks']>0] 
tgtg['stock_value'] = tgtg['cu_stocks'] * tgtg['wac_p']
tgtg =tgtg.sort_values(by ='stock_value',ascending = False) 
tgtg = tgtg[tgtg['stock_value']>100]
tgtg = tgtg.merge(market_data,on=['cohort_id','product_id'],how='left')
tgtg = tgtg.merge(stats,on=['region','product_id'])
tgtg = tgtg.merge(market_main_data[['cohort_id','product_id','target_margin']],on=['cohort_id','product_id'],how='left')
tgtg=tgtg.fillna(1000)
tgtg['min_margin'] = np.minimum(np.minimum(np.minimum(tgtg['market_min']*0.8,tgtg['target_margin']/4),tgtg['min_boundary']*0.9),tgtg['optimal_bm']*0.75)
tgtg['max_margin'] = tgtg['min_margin']
tgtg.to_excel("Min_max_data/tgtg.xlsx")
tgtg = tgtg[['cohort_id', 'product_id','min_margin', 'max_margin']]
tgtg = tgtg.groupby(['cohort_id', 'product_id']).agg({'min_margin':min,'max_margin':min}).reset_index()
tgtg

Unnamed: 0,cohort_id,product_id,min_margin,max_margin
0,701,1069,0.013468,0.013468
1,701,9353,0.008440,0.008440
2,701,9570,0.037267,0.037267
3,701,10384,0.011544,0.011544
4,701,10667,0.015649,0.015649
...,...,...,...,...
91,1126,12031,0.024704,0.024704
92,1126,12032,0.024704,0.024704
93,1126,12343,0.026500,0.026500
94,1126,12533,0.007619,0.007619


In [36]:
tgtg['type'] = 'TGTG'
result = final_df.merge(tgtg[['product_id', 'cohort_id']], 
                   on=['product_id', 'cohort_id'], 
                   how='left', indicator=True)

result = result[result['_merge'] == 'left_only'].drop(columns=['_merge'])
final_df = pd.concat([result,tgtg],axis=0)

In [37]:
price_ups=price_ups.merge(regions,on=['region'])
final_df=final_df.merge(price_ups,on=['product_id','cohort_id'],how='left')
final_df.loc[(~final_df['new_pp'].isna())&(final_df['type']!='TGTG'),'max_margin'] = np.minimum(final_df['max_margin']+0.15,final_df['min_margin']+0.2)
final_df['enforce'] = np.nan
final_df.loc[~final_df['new_pp'].isna(),'enforce']= 1  
final_df=final_df.drop_duplicates()
final_df = final_df.merge(sales,on=['cohort_id','product_id'],how='left')
final_df = final_df[['cohort_id', 'product_id','sku', 'min_margin', 'max_margin', 'enforce','brand','type']]
final_df = final_df.merge(min_max_df[['cohort_id', 'product_id','min_margin', 'max_margin']].rename(columns = {'min_margin':'old_min','max_margin':'old_max'}),on=['cohort_id', 'product_id'],how='left')


In [38]:
# final_df.loc[final_df['product_id'].isin([7630,95]),'min_margin'] = final_df['old_min']
# final_df.loc[final_df['product_id'].isin([7630,95]),'max_margin'] = final_df['old_max']
# final_df

In [39]:
final_df.to_excel('Min_max_data/min_max_data.xlsx')