In [None]:
%%capture

# Upgrade pip
!pip install --upgrade pip
# Connectivity
!pip install psycopg2-binary  # PostgreSQL adapter
# !pip install snowflake-connector-python  # Snowflake connector
!pip install snowflake-connector-python==3.15.0 # Snowflake connector Older Version
!pip install snowflake-sqlalchemy  # Snowflake SQLAlchemy connector
!pip install warnings # Warnings management
# !pip install pyarrow # Serialization
!pip install keyring==23.11.0 # Key management
!pip install sqlalchemy==1.4.46 # SQLAlchemy
!pip install requests # HTTP requests
!pip install boto3 # AWS SDK
# !pip install slackclient # Slack API
!pip install oauth2client # Google Sheets API
!pip install gspread==5.9.0 # Google Sheets API
!pip install gspread_dataframe # Google Sheets API
!pip install google.cloud # Google Cloud
# Data manipulation and analysis
!pip install polars
!pip install pandas==2.2.1
!pip install numpy
# !pip install fastparquet
!pip install openpyxl # Excel file handling
!pip install xlsxwriter # Excel file handling
# Linear programming
!pip install pulp
# Date and time handling
!pip install --upgrade datetime
!pip install python-time
!pip install --upgrade pytz
# Progress bar
!pip install tqdm
# Database data types
!pip install db-dtypes
# Geospatial data handling
# !pip install geopandas
# !pip install shapely
# !pip install fiona
# !pip install haversine
# Plotting

# Modeling
!pip install statsmodels
!pip install scikit-learn

!pip install import-ipynb

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import calendar
import json
from datetime import date, timedelta
from oauth2client.service_account import ServiceAccountCredentials
import setup_environment_2
import importlib
import import_ipynb
import warnings
import demand_sku_cntrb
warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

  from pandas.core.computation.check import NUMEXPR_INSTALLED


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json
/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


In [2]:
pd.set_option('display.max_columns', None)

In [3]:
today = datetime.today()- timedelta(days=4)
month_start = today.replace(day=1)
first_part = (today - month_start).days

last_day = calendar.monthrange(today.year, today.month)[1]
second_part = (last_day - today.day)+1

In [4]:
def query_snowflake(query, columns=[]):
    import os
    import snowflake.connector
    import numpy as np
    import pandas as pd
    con = snowflake.connector.connect(
        user =  os.environ["SNOWFLAKE_USERNAME"],
        account= os.environ["SNOWFLAKE_ACCOUNT"],
        password= os.environ["SNOWFLAKE_PASSWORD"],
        database =os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        if len(columns) == 0:
            out = pd.DataFrame(np.array(cur.fetchall()))
        else:
            out = pd.DataFrame(np.array(cur.fetchall()),columns=columns)
        return out
    except Exception as e:
        print("Error: ", e)
    finally:
        cur.close()
        con.close()

In [5]:
query = '''
SHOW PARAMETERS LIKE 'TIMEZONE'
'''
x  = query_snowflake(query)
zone_to_use = x[1].values[0]
zone_to_use

'America/Los_Angeles'

## Prodcut Selection

In [6]:
command_string = f'''
with last_update as (
select  DATE_PART('hour', max_date) * 60 + DATE_PART('minute', max_date) AS total_minutes
from (
select max(created_at) as max_date from sales_orders
)

),
 predicted_rr  as (
select product_id,warehouse_id,rr,date
from Finance.PREDICTED_RUNNING_RATES
where date >= CURRENT_DATE
qualify date = max(date)over(partition by product_id,warehouse_id)
),
days_stocks as (
select timestamp::date as date ,product_id,warehouse_id,avg(in_stock) as in_stock_perc,avg(case when date_part('hour',timestamp) =date_part('hour',current_timestamp)-1 then  in_stock end) as last_hour_stocks
from (
select timestamp,product_id,warehouse_id,case when AVAILABLE_STOCK > 0 then 1 else 0 end as in_stock
from materialized_views.STOCK_SNAP_SHOTS_RECENT sss
where sss.timestamp::date >= date_trunc('month',current_date - 60)
and date_part('hour',sss.timestamp)<date_part('hour',CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) 
and warehouse_id in (1,8,170,236,337,339,401,501,632,703,797,962)
)
group by all 
),
base as (
select *, row_number()over(partition by retailer_id order by priority) as rnk 
from (
select x.*,TAGGABLE_ID as retailer_id 
from (
select id as cohort_id,name as cohort_name,priority,dynamic_tag_id 
from cohorts 
where is_active = 'true'
and id in (700,701,702,703,704,1123,1124,1125,1126)
) x 
join DYNAMIC_TAGgables dt on x.dynamic_tag_id = dt.dynamic_tag_id
)
qualify rnk = 1 
),
sales_data as (
SELECT  DISTINCT
		so.created_at::date as date,
		pso.warehouse_id as warehouse_id,
		pso.product_id,
		CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
		brands.name_ar as brand, 
		categories.name_ar as cat,
        sum(pso.total_price) as all_day_nmv,
		sum(case when (date_part('hour',so.created_at)*60 + DATE_PART('minute', so.created_at))< (select * from last_update) then pso.total_price end) as uth_nmv,
		sum(case when (date_part('hour',so.created_at)*60 + DATE_PART('minute', so.created_at))
		between (select * from last_update) -60 
		and (select * from last_update)
		then pso.total_price end) as last_hour_nmv,


FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f  ON f.product_id = pso.product_id
                        AND f.from_date::date <= so.created_at ::date
                        AND f.to_date::date > so.created_at ::date
JOIN product_units ON product_units.id = products.unit_id 
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id  

WHERE   True
    AND so.created_at ::date >= date_trunc('month',current_date - 60)
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
order by date desc
),
data as (
select * , 1/nullif((0.3*week_distance+0.1*month_distance+0.6*day_distance),0) as distance
from (
select * ,
floor((DATE_PART('day', date) - 1) / 7 + 1) AS week_of_month,
DATE_PART('month', date) as month,
DATE_PART('DOW', date) AS day_number,

abs(floor((DATE_PART('day', current_date) - 1) / 7 + 1) - week_of_month)  as week_distance ,
abs(DATE_PART('month', current_date)- month) as month_distance,
abs(DATE_PART('DOW', current_date)- day_number) as day_distance
from (
select *, max(case when date = CURRENT_DATE then last_hour_stocks end) over(partition by product_id,warehouse_id) as current_stocks 
from (
select ds.*, all_day_nmv,
uth_nmv,
last_hour_nmv
from days_stocks ds
left join sales_data sd  on ds.product_id = sd.product_id and ds.warehouse_id = sd.warehouse_id and ds.date= sd.date
)
)
where current_stocks <> 0 
and (in_stock_perc = 1 or date = CURRENT_DATE)
)
),
current_state as (
select product_id,warehouse_id,AVAILABLE_STOCK,activation
from PRODUCT_WAREHOUSE
where IS_BASIC_UNIT = 1
)
select x.*,
cs.AVAILABLE_STOCK,
cs.activation,
coalesce(prr.rr,0) as rr,
case when coalesce(prr.rr,0) <>0 then cs.AVAILABLE_STOCK/coalesce(prr.rr,0) else cs.AVAILABLE_STOCK end  as doh ,
cs.AVAILABLE_STOCK*f.wac1  as stock_value
 from (
select product_id,warehouse_id,
coalesce(max(case when state = 'prev' then all_day_nmv end),0) as prev_all_day,
coalesce(max(case when state = 'prev' then uth_nmv end),0)  as prev_uth,
coalesce(max(case when state = 'prev' then last_hour_nmv end),0)  as prev_last_hour,

coalesce(max(case when state = 'current' then all_day_nmv end),0)  as current_all_day,
coalesce(max(case when state = 'current' then uth_nmv end),0)  as current_uth,
coalesce(max(case when state = 'current' then last_hour_nmv end),0)  as current_last_hour

from (
select 'current' as state,product_id,warehouse_id,all_day_nmv,uth_nmv,last_hour_nmv
from data
where date = CURRENT_DATE
union all 
(
select state,product_id,warehouse_id,
sum(all_day_nmv*distance)/sum(distance) as all_day_nmv,
sum(uth_nmv*distance)/sum(distance) as uth_nmv,
sum(last_hour_nmv*distance)/sum(distance) as last_hour_nmv
from(
select 'prev' as state,product_id,warehouse_id,all_day_nmv,uth_nmv,last_hour_nmv,distance
from data 
where date <> CURRENT_DATE
)
group by all 
)
)
group by all 
)x 
join current_state cs on x.product_id = cs.product_id and x.warehouse_id = cs.warehouse_id
left join predicted_rr prr on x.product_id = prr.product_id and x.warehouse_id = prr.warehouse_id 
join products p on p.id = x.product_id
join finance.all_cogs f on f.product_id = x.product_id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp()) between f.from_date and f.to_date 
where doh > 1 
and p.activation ='true'
and cs.activation = 'true'
and cs.AVAILABLE_STOCK * f.wac1 >= 1000
and prev_uth > 0
'''
product_data = query_snowflake(command_string, columns = ['product_id','warehouse_id','prev_all_day','prev_uth','prev_last_hour','current_all_day','current_uth','current_last_hour','available_stock','activation','rr','doh','stock_value'])
product_data.product_id = pd.to_numeric(product_data.product_id)
product_data.warehouse_id = pd.to_numeric(product_data.warehouse_id)
product_data.prev_all_day = pd.to_numeric(product_data.prev_all_day)
product_data.prev_uth = pd.to_numeric(product_data.prev_uth)
product_data.prev_last_hour = pd.to_numeric(product_data.prev_last_hour)
product_data.current_all_day = pd.to_numeric(product_data.current_all_day)
product_data.current_uth = pd.to_numeric(product_data.current_uth)
product_data.current_last_hour = pd.to_numeric(product_data.current_last_hour)
product_data.available_stock = pd.to_numeric(product_data.available_stock)
product_data.rr = pd.to_numeric(product_data.rr)
product_data.doh = pd.to_numeric(product_data.doh)
product_data.stock_value = pd.to_numeric(product_data.stock_value)

In [7]:
query = f'''
with last_update as (
select  DATE_PART('hour', max_date) * 60 + DATE_PART('minute', max_date) AS total_minutes
from (
select max(created_at) as max_date from sales_orders
)
),
base as (
select *, row_number()over(partition by retailer_id order by priority) as rnk 
from (
select x.*,TAGGABLE_ID as retailer_id 
from (
select id as cohort_id,name as cohort_name,priority,dynamic_tag_id 
from cohorts 
where is_active = 'true'
and id in (700,701,702,703,704,1123,1124,1125,1126)
) x 
join DYNAMIC_TAGgables dt on x.dynamic_tag_id = dt.dynamic_tag_id
)
qualify rnk = 1 
),
sales as (
SELECT 
		so.created_at::date as date,
		pso.warehouse_id as warehouse_id,
        sum(pso.total_price) as all_day_nmv,
		sum(case when (date_part('hour',so.created_at)*60 + DATE_PART('minute', so.created_at))< (select * from last_update) then pso.total_price end) as uth_nmv,
		sum(case when (date_part('hour',so.created_at)*60 + DATE_PART('minute', so.created_at))
		between (select * from last_update) -60 
		and (select * from last_update)
		then pso.total_price end) as last_hour_nmv,


FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f  ON f.product_id = pso.product_id
                        AND f.from_date::date <= so.created_at ::date
                        AND f.to_date::date > so.created_at ::date
JOIN product_units ON product_units.id = products.unit_id 
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id  

WHERE   True
    AND so.created_at ::date between date_trunc('month',current_date - 60) and current_date -1 
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
order by date desc
)
select warehouse_id,sum(uth_cntrb*distance)/sum(distance) as uth_cntrb
from (
select *, 1/nullif((0.3*week_distance+0.1*month_distance+0.6*day_distance),0) as distance
from(
select * ,uth_nmv/all_day_nmv as uth_cntrb,
floor((DATE_PART('day', date) - 1) / 7 + 1) AS week_of_month,
DATE_PART('month', date) as month,
DATE_PART('DOW', date) AS day_number,
abs(floor((DATE_PART('day', current_date) - 1) / 7 + 1) - week_of_month)  as week_distance ,
abs(DATE_PART('month', current_date)- month) as month_distance,
abs(DATE_PART('DOW', current_date)- day_number) as day_distance

from sales 
)
)
group by all 
'''
uth_cntrb = query_snowflake(query, columns = ['warehouse_id','uth_cntrb'])
uth_cntrb.warehouse_id = pd.to_numeric(uth_cntrb.warehouse_id)

In [8]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id)),


local_prices as (
SELECT  case when cpu.cohort_id in (700,695) then 'Cairo'
             when cpu.cohort_id in (701) then 'Giza'
             when cpu.cohort_id in (704,698) then 'Delta East'
             when cpu.cohort_id in (703,697) then 'Delta West'
             when cpu.cohort_id in (696,1123,1124,1125,1126) then 'Upper Egypt'
             when cpu.cohort_id in (702,699) then 'Alexandria'
        end as region,
		cohort_id,
        pu.product_id,
		pu.packing_unit_id as packing_unit_id,
		pu.basic_unit_count,
        avg(cpu.price) as price
FROM    cohort_product_packing_units cpu
join    PACKING_UNIT_PRODUCTS pu on pu.id = cpu.product_packing_unit_id
WHERE   cpu.cohort_id in (700,701,702,703,704,696,695,698,697,699,1123,1124,1125,1126)
    and cpu.created_at::date<>'2023-07-31'
    and cpu.is_customized = true
	group by all 
),
live_prices as (
select region,cohort_id,product_id,pu_id as packing_unit_id,buc as basic_unit_count,NEW_PRICE as price
from materialized_views.DBDP_PRICES
where created_at = current_date
and DATE_PART('hour',CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) BETWEEN SPLIT_PART(time_slot, '-', 1)::int AND SPLIT_PART(time_slot, '-', 2)::int
and cohort_id in (700,701,702,703,704,696,695,698,697,699,1123,1124,1125,1126)
),
prices as (
select *
from (
    SELECT *, 1 AS priority FROM live_prices
    UNION ALL
    SELECT *, 2 AS priority FROM local_prices
)
QUALIFY ROW_NUMBER() OVER (PARTITION BY region,cohort_id,product_id,packing_unit_id ORDER BY priority) = 1
)
select warehouse_id,product_id,price 
from prices 
join whs on prices.cohort_id = whs.cohort_id
and basic_unit_count = 1 
'''
product_warehouse_price = query_snowflake(query, columns = ['warehouse_id','product_id','price'])
product_warehouse_price.warehouse_id = pd.to_numeric(product_warehouse_price.warehouse_id)
product_warehouse_price.product_id = pd.to_numeric(product_warehouse_price.product_id)
product_warehouse_price.price = pd.to_numeric(product_warehouse_price.price)

In [9]:
query = f'''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id)),
full_data as (
select products.id as product_id, region,warehouse_id
from products , whs 
where activation = 'true'
),				

MP as (
select region,product_id,
min(min_price) as min_price,
min(max_price) as max_price,
min(mod_price) as mod_price,
min(true_min) as true_min,
min(true_max) as true_max

from (
select mp.region,mp.product_id,mp.pu_id,
min_price/BASIC_UNIT_COUNT as min_price,
max_price/BASIC_UNIT_COUNT as max_price,
mod_price/BASIC_UNIT_COUNT as mod_price,
TRUE_MIN_PRICE/BASIC_UNIT_COUNT as true_min,
TRUE_MAX_PRICE/BASIC_UNIT_COUNT as true_max
from materialized_views.marketplace_prices mp 
join packing_unit_products pup on pup.product_id = mp.product_id and pup.packing_unit_id = mp.pu_id
)
group by all 
),
region_mapping AS (
    SELECT * 
	FROM 
	(	VALUES
        ('Delta East', 'Delta West'),
        ('Delta West', 'Delta East'),
        ('Alexandria', 'Cairo'),
        ('Alexandria', 'Giza'),
        ('Upper Egypt', 'Cairo'),
        ('Upper Egypt', 'Giza'),
		('Cairo','Giza'),
		('Giza','Cairo'),
		('Delta West', 'Cairo'),
		('Delta East', 'Cairo'),
		('Delta West', 'Giza'),
		('Delta East', 'Giza')
		)
    AS region_mapping(region, fallback_region)
)


select region,warehouse_id,product_id,
min(final_min_price) as final_min_price,
min(final_max_price) as final_max_price,
min(final_mod_price) as final_mod_price,
min(final_true_min) as final_true_min,
min(final_true_max) as final_true_max

from (
SELECT
distinct 
	w.region,
    w.warehouse_id,
	w.product_id,
    COALESCE(m1.min_price, m2.min_price) AS final_min_price,
    COALESCE(m1.max_price, m2.max_price) AS final_max_price,
    COALESCE(m1.mod_price, m2.mod_price) AS final_mod_price,
	COALESCE(m1.true_min, m2.true_min) AS final_true_min,
	COALESCE(m1.true_max, m2.true_max) AS final_true_max,
FROM full_data w
LEFT JOIN MP m1
    ON w.region = m1.region and w.product_id = m1.product_id
JOIN region_mapping rm
    ON w.region = rm.region
LEFT JOIN MP m2
    ON rm.fallback_region = m2.region
   AND w.product_id = m2.product_id
)
where final_min_price is not null 
group by all 
'''
marketplace = query_snowflake(query, columns = ['REGION','WAREHOUSE_ID','PRODUCT_ID','FINAL_MIN_PRICE','FINAL_MAX_PRICE','FINAL_MOD_PRICE','FINAL_TRUE_MIN','FINAL_TRUE_MAX'])
marketplace.columns = marketplace.columns.str.lower()
for col in marketplace.columns:
    marketplace[col] = pd.to_numeric(marketplace[col], errors='ignore')

In [10]:
query = f'''
select product_id,min(ben_soliman_basic_price) as ben_soliman_price
from (
select MAXAB_PRODUCT_ID as product_id,
MAXAB_BASIC_UNIT_COUNT as buc,
FINAL_PRICE_AFTER_CONVERSION as ben_soliman_price,
ben_soliman_price/buc as ben_soliman_basic_price,
max(INJECTION_DATE) OVER(PARTITION by product_id) as max_date
from materialized_views.savvy_mapping
where FINAL_PRICE_AFTER_CONVERSION is not null 
QUALIFY INJECTION_DATE = max_date
)
group by all 
'''

bensoliman = query_snowflake(query, columns = ['product_id','ben_soliman_basic_price'])
bensoliman.columns = bensoliman.columns.str.lower()
for col in bensoliman.columns:
    bensoliman[col] = pd.to_numeric(bensoliman[col], errors='ignore')     

In [11]:
query = f'''
select region,product_id,optimal_bm,MIN_BOUNDARY,MAX_BOUNDARY,MEDIAN_BM
from (
select region,product_id,target_bm,optimal_bm,MIN_BOUNDARY,MAX_BOUNDARY,MEDIAN_BM,max(created_at) over(partition by product_id,region) as max_date,created_at
from materialized_views.PRODUCT_STATISTICS
where created_at::date >= date_trunc('month',current_date - 60)
qualify max_date = created_at
)

'''
 
stats = query_snowflake(query, columns = ['region','product_id','optimal_bm','MIN_BOUNDARY','MAX_BOUNDARY','MEDIAN_BM'])
stats.columns = stats.columns.str.lower()
for col in stats.columns:
    stats[col] = pd.to_numeric(stats[col], errors='ignore')

In [12]:
query = f'''
select warehouse_id,region
from (
select * ,row_number()over(partition by warehouse_id order by nmv desc) as rnk 
from (
SELECT case when regions.id = 2 then cities.name_en else regions.name_en end as region,
	   pso.warehouse_id,
        sum(pso.total_price) as nmv



FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id             

WHERE   True
    AND so.created_at ::date between current_date-31 and CURRENT_DATE-1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
)
qualify rnk = 1 
)
'''
warehouse_region = query_snowflake(query, columns = ['warehouse_id','region'])
warehouse_region.columns = warehouse_region.columns.str.lower()
for col in warehouse_region.columns:
    warehouse_region[col] = pd.to_numeric(warehouse_region[col], errors='ignore')    

In [13]:
query = f'''
SELECT DISTINCT cat, brand, margin as target_bm
FROM    performance.commercial_targets cplan
QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE)OVER()) = DATE_TRUNC('month', CURRENT_DATE) THEN DATE_TRUNC('month', CURRENT_DATE)
ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') END = DATE_TRUNC('month', date)
'''
brand_cat_target  = query_snowflake(query, columns = ['cat','brand','target_bm'])
brand_cat_target.target_bm=pd.to_numeric(brand_cat_target.target_bm)

query = f'''
select cat,sum(target_bm *(target_nmv/cat_total)) as cat_target_margin
from (
select *,sum(target_nmv)over(partition by cat) as cat_total
from (
select cat,brand,avg(target_bm) as target_bm , sum(target_nmv) as target_nmv
from (
SELECT DISTINCT date,city as region,cat, brand, margin as target_bm,nmv as target_nmv
FROM    performance.commercial_targets cplan
QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE)OVER()) = DATE_TRUNC('month', CURRENT_DATE) THEN DATE_TRUNC('month', CURRENT_DATE)
ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') END = DATE_TRUNC('month', date)
)
group by all
)
)
group by all 
'''
cat_target  = query_snowflake(query, columns = ['cat','cat_target_margin'])
cat_target.cat_target_margin=pd.to_numeric(cat_target.cat_target_margin)

query = f'''
SELECT  DIStinct  
		products.id as product_id,
		CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
		brands.name_ar as brand, 
		categories.name_ar as cat,
		f.wac_p
from products 
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f  ON f.product_id = products.id and CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp()) between f.from_date and f.to_date 
JOIN product_units ON product_units.id = products.unit_id 
'''
sku_info  = query_snowflake(query, columns = ['product_id','sku','brand','cat','wac_p'])
sku_info.product_id=pd.to_numeric(sku_info.product_id)
sku_info.wac_p=pd.to_numeric(sku_info.wac_p)

In [14]:
query = f'''
with last_update as (
select  DATE_PART('hour', max_date) * 60 + DATE_PART('minute', max_date) AS total_minutes
from (
select max(created_at) as max_date from sales_orders
)
)
SELECT  DISTINCT
		pso.warehouse_id,
		pso.product_id,
		coalesce(sum(case when DATE_PART('hour', so.created_at) * 60 + DATE_PART('minute', so.created_at)  between  (DATE_PART('hour', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) * 60 + DATE_PART('minute', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())))-120 and (DATE_PART('hour', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) * 60 + DATE_PART('minute', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())))-60 then pso.total_price end),0) as t_2_nmv,
		coalesce(sum(case when DATE_PART('hour', so.created_at) * 60 + DATE_PART('minute', so.created_at)  >  (DATE_PART('hour', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) * 60 + DATE_PART('minute', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())))-60 then pso.total_price end),0) as t_1_nmv,
		coalesce(avg(case when (DATE_PART('hour', so.created_at) * 60 + DATE_PART('minute', so.created_at)  between  (DATE_PART('hour', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) * 60 + DATE_PART('minute', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())))-120 and (DATE_PART('hour', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) * 60 + DATE_PART('minute', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())))-60) and (item_discount_value <> 0) then (pso.item_price/basic_unit_count) - (pso.item_discount_value/basic_unit_count) end),0) as t_2_price,
		coalesce(avg(case when (DATE_PART('hour', so.created_at) * 60 + DATE_PART('minute', so.created_at)  >  (DATE_PART('hour', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) * 60 + DATE_PART('minute', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())))-60) and (item_discount_value <> 0) then  (pso.item_price/basic_unit_count) - (pso.item_discount_value/basic_unit_count) end),0) as t_1_price



FROM product_sales_order pso 
JOIN sales_orders so ON so.id = pso.sales_order_id          

WHERE so.created_at::date = current_date 
and DATE_PART('hour', so.created_at) * 60 + DATE_PART('minute', so.created_at)  >=  (DATE_PART('hour', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())) * 60 + DATE_PART('minute', CONVERT_TIMEZONE('{zone_to_use}', 'Africa/Cairo', CURRENT_TIMEstamp())))-120
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
GROUP BY ALL
'''
last_two_hours =  query_snowflake(query, columns = ['warehouse_id','product_id','t_2_nmv','t_1_nmv','t_2_price','t_1_price'])
last_two_hours.columns = last_two_hours.columns.str.lower()
for col in last_two_hours.columns:
    last_two_hours[col] = pd.to_numeric(last_two_hours[col], errors='ignore')     

In [15]:
product_data = product_data.merge(product_warehouse_price,on=['product_id','warehouse_id'])
product_data = product_data.merge(uth_cntrb[['warehouse_id','uth_cntrb']],on='warehouse_id')
product_data['product_UTH_growth'] =(product_data['current_uth'] -product_data['prev_uth'])/product_data['prev_uth']
product_data['product_LH_growth'] =(product_data['current_last_hour'] -product_data['prev_last_hour'])/product_data['prev_last_hour']
product_data[['product_UTH_growth','product_LH_growth']] =product_data[['product_UTH_growth','product_LH_growth']].fillna(0) 
product_data = product_data.replace([np.inf, -np.inf], 1)
product_data['product_closing_growth'] = (product_data['product_UTH_growth']*product_data['uth_cntrb'])+(product_data['product_LH_growth']*(1-product_data['uth_cntrb']))

In [16]:
warehouse_data = product_data.groupby('warehouse_id')[['prev_all_day', 'prev_uth','prev_last_hour', 'current_all_day', 'current_uth', 'current_last_hour']].sum().reset_index()
warehouse_data['UTH_growth'] =(warehouse_data['current_uth'] -warehouse_data['prev_uth'])/warehouse_data['prev_uth']
warehouse_data['LH_growth'] =(warehouse_data['current_last_hour'] -warehouse_data['prev_last_hour'])/warehouse_data['prev_last_hour']
warehouse_data = warehouse_data.merge(uth_cntrb,on='warehouse_id')
warehouse_data['Closing_growth'] = (warehouse_data['UTH_growth']*warehouse_data['uth_cntrb'])+(warehouse_data['LH_growth']*(1-warehouse_data['uth_cntrb']))
dropping_whs = warehouse_data[warehouse_data['Closing_growth']<0]

In [17]:
growing_products  = product_data.merge(warehouse_data[['warehouse_id','UTH_growth','LH_growth','Closing_growth']],on='warehouse_id')
#needs edit
growing_products = growing_products[growing_products['product_closing_growth']>=np.maximum(growing_products['Closing_growth'],0.1)]
growing_products['max_closing'] = growing_products.groupby('product_id')['product_closing_growth'].transform(sum)
growing_products=growing_products[growing_products['max_closing']==growing_products['product_closing_growth']]
growing_products = growing_products.groupby(['product_id'])['price'].mean().reset_index()
growing_products.columns = ['product_id','maxab_good_price']

In [18]:
dropping_products = product_data.merge(dropping_whs[['warehouse_id','UTH_growth','LH_growth','Closing_growth']],on='warehouse_id')
dropping_products = dropping_products[dropping_products['product_closing_growth'] < 0]
dropping_products = dropping_products.sort_values(by='prev_all_day',ascending = False)

dropping_products = dropping_products.merge(growing_products,on='product_id',how='left')
dropping_products = dropping_products.merge(marketplace,on=['product_id','warehouse_id'],how='left')
dropping_products = dropping_products.merge(bensoliman[['product_id','ben_soliman_basic_price']],on=['product_id'],how='left')
dropping_products = dropping_products.drop(columns = 'region')
dropping_products = dropping_products.merge(warehouse_region,on=['warehouse_id'])
dropping_products = dropping_products.merge(stats,on=['product_id','region'],how='left')
dropping_products = dropping_products.merge(sku_info,on=['product_id'])
dropping_products = dropping_products.merge(brand_cat_target,on=['brand','cat'],how='left')
dropping_products = dropping_products.merge(cat_target,on=['cat'],how='left')
dropping_products['Target_margin'] = dropping_products['target_bm'].fillna(dropping_products['cat_target_margin'])
dropping_products = dropping_products[[ 'warehouse_id','product_id','sku','brand','cat', 'prev_all_day', 'prev_uth',
       'prev_last_hour', 'current_all_day', 'current_uth', 'current_last_hour','product_UTH_growth', 'product_LH_growth',
       'product_closing_growth','doh','wac_p','price','maxab_good_price', 'final_min_price', 'final_max_price',
       'final_mod_price', 'final_true_min', 'final_true_max',
       'ben_soliman_basic_price','optimal_bm', 'min_boundary',
       'max_boundary', 'median_bm','Target_margin']]
dropping_products = dropping_products.merge(last_two_hours,on=['product_id','warehouse_id'],how='left')
dropping_products[['t_2_nmv','t_1_nmv','t_2_price','t_1_price']] = dropping_products[['t_2_nmv','t_1_nmv','t_2_price','t_1_price']].fillna(0)
dropping_products=dropping_products.drop_duplicates()

In [21]:
def select_price(product_UTH_growth,product_LH_growth,product_closing_growth,remaining_prices,price,wac,Target_margin,min_boundary,optimal_bm,t_1_price):
    target_price = 0 
    min_price = 0
    max_price = 0 
    acceptable = []
    source = ''
    for i in range(len(remaining_prices)-1,-1,-1):
        new_price = remaining_prices[i]
        diff = (new_price-price)/price
        current_margin = (price-wac)/price
        new_margin = (new_price-wac)/new_price
        if new_margin >= min_boundary and new_margin >= 0.25*Target_margin and new_margin >0  and diff <= -0.0025:
            target_price = new_price
            source = 'Listed'
            break
    if target_price == 0:
        
        for j in range(0,len(remaining_prices)):
            new_price = remaining_prices[j]
            diff = (new_price-price)/price
            current_margin = (price-wac)/price
            new_margin = (new_price-wac)/new_price
            if new_margin >0 :
                acceptable.append(new_price)
        if(len(acceptable) > 1):
            distance_arr = []
            for k in range(0,len(acceptable)):
                new_price = acceptable[k]
                diff = 1/abs(price-new_price)
                distance_arr.append(diff)
            
            total_array = sum(distance_arr)
            normalized = [x / total_array for x in distance_arr]
            final_value = 0 
            for v in range(0,len(normalized)):
                w = normalized[v]
                p = acceptable[v]
                final_value+= (w*p)
            target_price = np.maximum(final_value,wac/(1-(0.3*Target_margin)))
            source = 'induced'
            
        elif (len(acceptable) > 0):
            new_price = acceptable[0]
            final_value = (0.3*new_price)+(0.7*price)
            target_price = np.maximum(final_value,wac/(1-(0.3*Target_margin)))
            source = 'induced'
                
              
               
    return target_price,source    
            

In [22]:
product_final_df = pd.DataFrame(columns = ['warehouse_id', 'product_id', 'sku', 'brand', 'cat', 'prev_all_day',
       'prev_uth', 'prev_last_hour', 'current_all_day', 'current_uth',
       'current_last_hour', 'product_UTH_growth', 'product_LH_growth',
       'product_closing_growth', 'doh', 'wac_p', 'price', 'maxab_good_price',
       'final_min_price', 'final_max_price', 'final_mod_price',
       'final_true_min', 'final_true_max', 'ben_soliman_basic_price',
       'optimal_bm', 'min_boundary', 'max_boundary', 'median_bm',
       'Target_margin', 't_2_nmv', 't_1_nmv', 't_2_price', 't_1_price','selected_price','source'])

for _,row in dropping_products.iterrows():
    wac = row['wac_p']
    price = row['price']
    maxab_good_price = row['maxab_good_price']
    final_min_price = row['final_min_price']
    final_max_price = row['final_max_price']
    final_mod_price = row['final_mod_price']
    final_true_min = row['final_true_min']
    final_true_max = row['final_true_max']
    ben_soliman_basic_price= row['ben_soliman_basic_price']
    optimal_price = wac/(1-row['optimal_bm'])
    min_b_price = wac/(1-row['min_boundary'])
    max_b_price = wac/(1-row['max_boundary'])
    median_price = wac/(1-row['median_bm'])
    target_price = wac/(1-row['Target_margin'])
    product_UTH_growth = row['product_UTH_growth']
    product_LH_growth = row['product_LH_growth']
    product_closing_growth=row['product_closing_growth']
    t_1_price = row['t_1_price']
    prices_list = [maxab_good_price,final_min_price,final_max_price,final_mod_price,final_true_min,final_true_max,ben_soliman_basic_price,optimal_price,min_b_price,max_b_price,median_price,target_price]
    cleaned_prices = list({x for x in prices_list if x not in [0, np.nan] and not pd.isna(x)})
    if t_1_price>0 and product_UTH_growth<product_LH_growth and product_LH_growth > 0:
        row['selected_price'] = t_1_price
        row['source'] == 'Prev_disc'
    else:
        remaining_prices = [x for x in cleaned_prices if (x < price) or (t_1_price > 0 and x <= t_1_price)]
        remaining_prices.sort()
        new_price,source = select_price(product_UTH_growth,product_LH_growth,product_closing_growth,remaining_prices,price,wac,row['Target_margin'],row['min_boundary'],row['optimal_bm'],t_1_price)
        row['selected_price'] = new_price
        row['source'] = source
        
        
    product_final_df = pd.concat([product_final_df, row.to_frame().T], ignore_index=True)



In [23]:
product_final_df['discount'] = abs((product_final_df['selected_price']-product_final_df['price'])/product_final_df['price'])
product_final_df = product_final_df[(product_final_df['discount'] > 0.002)&(product_final_df['selected_price']>0)]
product_final_df['discount'] = np.minimum(product_final_df['discount'],0.05)

In [24]:
product_final_df['tuple'] = product_final_df[["product_id",'warehouse_id']].apply(tuple, axis=1)
selected_skus_tuple = str(list(product_final_df['tuple']))[1:-1]
product_final_df=product_final_df.drop(columns = 'tuple')

## Retailers Selection

In [25]:
query = f'''
with selected_prods as (
select * 
from(
VALUES
{selected_skus_tuple}
)x(product_id,warehouse_id)
),
selected_districts as (
select distinct sp.warehouse_id , sp.product_id,dp.district_id  
from WAREHOUSE_DISPATCHING_RULES wdr 
join DISPATCHING_POLYGONS dp on dp.id = wdr.DISPATCHING_POLYGON_ID
join selected_prods sp on sp.product_id = wdr.product_id and wdr.warehouse_id = sp.warehouse_id
),
sales_before as (
select retailer_id,product_id,warehouse_id,district_id,avg(nmv) as avg_nmv_before
from (
SELECT  DISTINCT
so.id as order_id ,
sd.district_id,
sd.warehouse_id as warehouse_id,
pso.product_id as product_id,
so.retailer_id as retailer_id,
sum(pso.total_price) as nmv 


FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
join selected_districts sd on sd.product_id = pso.product_id and sd.district_id = districts.id
           

WHERE   True
    AND so.created_at ::date between current_date - 120 and current_date - 31
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
	
GROUP BY ALL
)
group by all 
),
sales_after as (
select retailer_id,product_id,warehouse_id,district_id,avg(nmv) as avg_nmv_after,max(order_date) as last_order
from (
SELECT  DISTINCT
so.id as order_id ,
so.created_at::date as order_date,
sales_order_status_id, 
sd.district_id,
sd.warehouse_id as warehouse_id,
pso.product_id as product_id,
so.retailer_id as retailer_id,
sum(pso.total_price) as nmv 


FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
join selected_districts sd on sd.product_id = pso.product_id and sd.district_id = districts.id
           

WHERE   True
    AND so.created_at ::date > current_date - 31
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
	
GROUP BY ALL
)
group by all 

),
made_order as (
select distinct so.retailer_id


FROM  sales_orders so 
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
join selected_districts sd on sd.district_id = districts.id
           

WHERE   True
    AND so.created_at ::date > current_date - 31
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
	
GROUP BY ALL
)

select distinct retailer_id , product_id,warehouse_id 
from (
select sb.* , coalesce(avg_nmv_after,0) as nmv_after,(nmv_after-avg_nmv_before)/avg_nmv_before as growth
from sales_before sb 
left join sales_after sa on sb.retailer_id = sa.retailer_id and sb.product_id = sa.product_id
left join made_order mo on mo.retailer_id = sa.retailer_id 
where growth < -0.6
and (current_date - last_order >=5 or last_order is null)
and mo.retailer_id is not null 
)
'''
churned_dropped =  query_snowflake(query, columns = ['retailer_id','product_id','warehouse_id'])
churned_dropped.columns = churned_dropped.columns.str.lower()
for col in churned_dropped.columns:
    churned_dropped[col] = pd.to_numeric(churned_dropped[col], errors='ignore')  
churned_dropped    

Unnamed: 0,retailer_id,product_id,warehouse_id
0,401857,346,962
1,850133,10719,8
2,272848,7004,170
3,362447,2049,632
4,749222,8915,962
...,...,...,...
1877,40745,5083,962
1878,108884,2327,8
1879,104784,2912,337
1880,503595,1446,962


In [None]:
query = '''
with retailer_cohort as (
select *, row_number()over(partition by retailer_id order by priority) as rnk 
from (
select x.*,TAGGABLE_ID as retailer_id 
from (
select id as cohort_id,name as cohort_name,priority,dynamic_tag_id 
from cohorts 
where is_active = 'true'
and id in (700,701,702,703,704,1123,1124,1125,1126)
) x 
join DYNAMIC_TAGgables dt on x.dynamic_tag_id = dt.dynamic_tag_id
)
qualify rnk = 1 
order by cohort_id
),
vs as (
select event_date,event_timestamp,retailer_id,SECTION_ID,s.name_ar as section,GA_SESSION_ID 
from MAXAB_EVENTS.VIEW_SECTION
join sections s on s.id = MAXAB_EVENTS.VIEW_SECTION.section_id 
where (seller_name in ('maxab','مكسب')
or flow_type = '')
and  event_timestamp::date between date_trunc('month',current_date - interval '18 month') and current_date-1
AND country LIKE '%Egypt%'
AND user_id LIKE '%EG_retailers_%'
),
vb as (
select 
event_date,
event_timestamp,
vb.retailer_id,
vb.brand_id,
vb.brand_name,
s.id as section_id ,
s.name_ar as section,
c.id as cat_id,
c.name_ar as cat,
GA_SESSION_ID 
FROM maxab_events.view_brand vb
join CATEGORIES c on c.id = vb.category_id 
join sections s on s.id = c.section_id
WHERE  event_timestamp::date between date_trunc('month',current_date - interval '18 month') and current_date-1
AND country LIKE '%Egypt%'
AND user_id LIKE '%EG_retailers_%'
and brand_id <> 'null'
),
catbrands as (
SELECT  DISTINCT
		brands.id as brand_id, 
		categories.id as cat_id





FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
           

WHERE   True
    AND so.created_at ::date between date_trunc('month',current_date - interval '6 month') and date_trunc('month',current_date)-1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

),
sales as (

SELECT  DISTINCT
		so.created_at::date as date,
		so.retailer_id,
		brands.id as brand_id, 
		categories.id as cat_id,
		1 as purchased,
		sum(pso.total_price) as nmv,





FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
           

WHERE   True
    AND so.created_at::date between date_trunc('month',current_date - interval '18 month') and current_date-1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
group by all 

),
Actual_purchased as (
select cohort_id,brand_id,cat_id,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY case when o_month <> date_trunc('month',current_date - interval '1 month') then  num_rets end)AS Actual_rets_q3,
coalesce(avg(case when o_month = date_trunc('month',current_date - interval '1 month') then num_rets end),0) as actual_rets_lm

from (
select *,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY case when o_month <> date_trunc('month',current_date - interval '1 month') then  num_rets end) OVER (partition by brand_id,cat_id,cohort_id) AS q1,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY case when o_month <> date_trunc('month',current_date - interval '1 month') then  num_rets end) OVER (partition by brand_id,cat_id,cohort_id) AS q3,
from (
select  date_trunc('month',date) as o_month,cohort_id,brand_id,cat_id,count(distinct sales.retailer_id) as num_rets
from sales 
join retailer_cohort rc on rc.retailer_id = sales.retailer_id
group by all 
)
)
where num_rets >= q1-(1.5*(q3-q1))
or o_month = date_trunc('month',current_date - interval '1 month')
group by all 
),
oos as (
select x.*,b.name_ar as brand ,c.name_ar as cat ,c.id as cat_id 
from (
select 
event_date,
event_timestamp,
oos.retailer_id,
oos.brand_id,
coalesce(oos.item_id,oos.product_id) as product_id,
GA_SESSION_ID,
1 as no_stocks
from maxab_events.out_of_stock oos
WHERE  event_timestamp::date between date_trunc('month',current_date - interval '18 month') and current_date-1
AND country LIKE '%Egypt%'
AND user_id LIKE '%EG_retailers_%'
and brand_id <> 'null'
and out_type = 'complete'
)x 
join products p on p.id = x.product_id 
join brands b on b.id = p.brand_id 
join categories c on c.id = p.category_id
),
historical_conv as (
select brand_id,brand_name,cat_id,cat,cohort_id, 
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY total_viewed) as viewed_75,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY total_purchased) purchased_75,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY conversion_rate) conv_75,
avg(oos_rate) as oos_rate
from (
select *,total_purchased/total_viewed as conversion_rate,total_oos/total_viewed as oos_rate,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY conversion_rate) OVER (partition by brand_id,cat_id,cohort_id) AS q1,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY conversion_rate) OVER (partition by brand_id,cat_id,cohort_id) AS q3,
q3-q1 as iqr 

from (
select month,brand_id,brand_name,cat_id,cat,cohort_id,
count(distinct retailer_id) as total_viewed,
coalesce(count( distinct case when purchased is not null then retailer_id end),0) as total_purchased,
coalesce(count(distinct case when oos >=0.3 then retailer_id end),0) as total_oos
from (
select date_trunc('month',event_date) as month,
brand_id,brand_name,
cat_id,cat,cohort_id,
retailer_id,
max(purchased) as purchased,
avg(no_stocks) as oos
from(

select distinct vb.* ,rc.cohort_id,s.purchased,coalesce(oos.no_stocks,0) as no_stocks
from vs 
join vb on vs.section_id = vb.section_id 
		and vs.GA_SESSION_ID = vb.GA_SESSION_ID 
		and vs.retailer_id = vb.retailer_id
		and vb.event_timestamp >= vs.event_timestamp 
		and vb.event_date = vs.event_date
		
left join oos on oos.brand_id = vb.brand_id 
			and oos.GA_SESSION_ID = vb.GA_SESSION_ID 
			and oos.retailer_id = vb.retailer_id
			and oos.event_timestamp >= vs.event_timestamp 
			and oos.event_date = vs.event_date
			
join retailer_cohort rc on rc.retailer_id = vb.retailer_id
join catbrands cb  on cb.brand_id = vb.brand_id and cb.cat_id = vb.cat_id
left join sales s on s.retailer_id = vs.retailer_id and s.brand_id = vb.brand_id and vb.cat_id = s.cat_id and s.date >= vb.event_date and s.date <= vb.event_date + 2

where vb.event_date::date <  date_trunc('month',current_date - interval '1 month')
and vs.event_date::date <  date_trunc('month',current_date - interval '1 month')
)
group by all
)
group by all 
)
)
where conversion_rate >= q1-(1.5*iqr)
group by all 
),
current_conv as (

select cohort_id,brand_id,brand_name,cat_id,cat,
total_viewed as lm_views,
total_purchased as lm_purchases,
total_purchased/total_viewed as lm_conversion_rate,
total_oos/total_viewed as lm_oos
from (
select month,brand_id,brand_name,cat_id,cat,cohort_id,
count(distinct retailer_id) as total_viewed,
coalesce(count( distinct case when purchased is not null then retailer_id end),0) as total_purchased,
coalesce(count(distinct case when oos >=0.3 then retailer_id end),0) as total_oos
from (
select date_trunc('month',event_date) as month,
brand_id,brand_name,
cat_id,cat,cohort_id,
retailer_id,
max(purchased) as purchased,
avg(no_stocks) as oos
from(
select distinct vb.* ,rc.cohort_id,s.purchased,coalesce(oos.no_stocks,0) as no_stocks
from vs 
join vb on vs.section_id = vb.section_id 
		and vs.GA_SESSION_ID = vb.GA_SESSION_ID 
		and vs.retailer_id = vb.retailer_id
		and vb.event_timestamp >= vs.event_timestamp 
		and vb.event_date = vs.event_date
left join oos on oos.brand_id = vb.brand_id 
			and oos.GA_SESSION_ID = vb.GA_SESSION_ID 
			and oos.retailer_id = vb.retailer_id
			and oos.event_timestamp >= vs.event_timestamp 
			and oos.event_date = vs.event_date
					
join retailer_cohort rc on rc.retailer_id = vb.retailer_id
join catbrands cb  on cb.brand_id = vb.brand_id and cb.cat_id = vb.cat_id
left join sales s on s.retailer_id = vs.retailer_id and s.brand_id = vb.brand_id and vb.cat_id = s.cat_id and s.date >= vb.event_date and s.date <= vb.event_date+2

where vb.event_date::date between  date_trunc('month',current_date - interval '1 month')  and date_trunc('month',current_date)-1
and vs.event_date::date between  date_trunc('month',current_date - interval '1 month')  and date_trunc('month',current_date)-1
)
group by all
)
group by all 
)
),
avg_ret_nmv as (
select cohort_id,brand_id,cat_id,avg(nmv) as avg_nmv
from (
select date_trunc('month',date) as date,brand_id,cat_id,sales.retailer_id,cohort_id,sum(nmv) as nmv
from sales 
join retailer_cohort rc on rc.retailer_id = sales.retailer_id
where date between date_trunc('month',current_date - interval '4 month') and date_trunc('month',current_date)-1
group by all 
)
group by all 
),
final as (
select a.*,
case when views_growth between -0.05 and 0.05 and conv_growth between -0.05 and 0.05 then 'Stable'
when views_growth between -0.05 and 0.05 and conv_growth < -0.05 then 'Price or stocks'
when views_growth between -0.05 and 0.05 and conv_growth >  0.05 then 'Growing'

when views_growth < -0.05  and conv_growth between -0.05 and 0.05 then 'App retailers Droping' 
when views_growth < -0.05  and conv_growth < -0.05 then 'Price-stocks-appRetention'
when views_growth < -0.05  and conv_growth > 0.05 then 'Needs more views'

when views_growth > 0.05  and conv_growth between -0.05 and 0.05 then 'stable' 
when views_growth > 0.05  and conv_growth < -0.05 then 'Price or stocks'
when views_growth > 0.05  and conv_growth > 0.05 then 'Spike'
end as status,
arn.avg_nmv,
case when status like '%Price%'then ceil(((1+(conv_growth/2))*conv_75)*lm_views) else lm_purchases end  as new_rets_to_purchase,
new_rets_to_purchase*coalesce(arn.avg_nmv,0) as target_nmv

from (
select *,
case when viewed_75 <>0 then  (lm_views - viewed_75)/viewed_75 else 0 end as views_growth,
case when purchased_75 <> 0 then (lm_purchases - purchased_75)/purchased_75 else 0 end  as purchases_growth,
case when conv_75 <> 0 then (lm_conversion_rate - conv_75)/conv_75 else 0 end as conv_growth
from (
select cohort_id,brand_id,brand_name,cat_id,cat,
case when purchased_75 <>0 then least(greatest((Actual_rets_q3/purchased_75),1),4)*viewed_75 else viewed_75 end as viewed_75,
Actual_rets_q3 as purchased_75,
case when viewed_75 > 0 then purchased_75/viewed_75 else 0 end as conv_75,
oos_rate,
case when lm_purchases <> 0 then least(greatest((actual_rets_lm/lm_purchases),1),4)*lm_views else lm_views end as lm_views,
actual_rets_lm as lm_purchases,
case when lm_views <> 0 then lm_purchases/lm_views else 0 end as lm_conversion_rate,
lm_oos
from(
select hc.*,
coalesce(Actual_rets_q3,0) as Actual_rets_q3,
coalesce(Actual_rets_lm,0) as Actual_rets_lm ,

coalesce(lm_views,0) as lm_views,
coalesce(lm_purchases,0) as lm_purchases ,
coalesce(lm_conversion_rate,0) as lm_conversion_rate,
coalesce(lm_oos,0) as lm_oos

from historical_conv hc 
left join Actual_purchased ap on hc.cohort_id = ap.cohort_id 
						 and hc.brand_id = ap.brand_id 
						 and hc.cat_id = ap.cat_id 
						  
left join current_conv cc on hc.cohort_id = cc.cohort_id 
						  and hc.brand_id = cc.brand_id 
						  and hc.cat_id = cc.cat_id
where conv_75 > 0 						  
)
where conv_75 > 0 
)

)a 
left join avg_ret_nmv arn on arn.cat_id = a.cat_id and arn.brand_id = a.brand_id and a.cohort_id = arn.cohort_id
),
current_month as (
select brand_id,cat_id,cohort_id,
count(distinct retailer_id) as cm_viewed,
coalesce(count(distinct case when purchased is not null then retailer_id end),0) as cm_purchased,
coalesce(count(distinct case when oos >=0.3 then retailer_id end),0) as cm_oos,
cm_purchased/cm_viewed as cm_conv,
cm_oos/cm_viewed as oos_perc
from (
select date_trunc('month',event_date) as month,
brand_id,
cat_id,cohort_id,
retailer_id,
max(purchased) as purchased,
avg(no_stocks) as oos
from(
select distinct vb.* ,rc.cohort_id,s.purchased,coalesce(oos.no_stocks,0) as no_stocks,
max(date_part('hour',vb.event_timestamp))over(partition by vb.event_date) as max_hour
from vs 
join vb on vs.section_id = vb.section_id 
		and vs.GA_SESSION_ID = vb.GA_SESSION_ID 
		and vs.retailer_id = vb.retailer_id
		and vb.event_timestamp >= vs.event_timestamp 
		and vb.event_date = vs.event_date
left join oos on oos.brand_id = vb.brand_id 
			and oos.GA_SESSION_ID = vb.GA_SESSION_ID 
			and oos.retailer_id = vb.retailer_id
			and oos.event_timestamp >= vs.event_timestamp 
			and oos.event_date = vs.event_date
			
join retailer_cohort rc on rc.retailer_id = vb.retailer_id
join catbrands cb  on cb.brand_id = vb.brand_id and cb.cat_id = vb.cat_id
left join sales s on s.retailer_id = vs.retailer_id and s.brand_id = vb.brand_id and vb.cat_id = s.cat_id and s.date >= vb.event_date and s.date <= vb.event_date+2

where vb.event_date::date >= date_trunc('month',current_date)
and vs.event_date::date >= date_trunc('month',current_date)
qualify max_hour = 23
)
group by all 
)
 
group by all 
) 

select f.*,cm_viewed,cm_purchased, cm_conv,oos_perc as cm_oos_perc
from final f 
left join current_month cm on f.cohort_id = cm.cohort_id and cm.brand_id = f.brand_id and cm.cat_id = f.cat_id
order by target_nmv desc
'''

In [None]:
query = f'''
with selected_prods as (
select * 
from(
VALUES
{selected_skus_tuple}
)x(product_id,warehouse_id)
),
selected_districts as (
select distinct sp.warehouse_id , sp.product_id,dp.district_id ,c.name_ar as cat ,b.name_ar as brand
from WAREHOUSE_DISPATCHING_RULES wdr 
join DISPATCHING_POLYGONS dp on dp.id = wdr.DISPATCHING_POLYGON_ID
join selected_prods sp on sp.product_id = wdr.product_id and wdr.warehouse_id = sp.warehouse_id
join products p on p.id = sp.product_id
join brands b on b.id = p.brand_id 
join categories c on c.id = p.category_id 
),

buy_cat as (
SELECT  DISTINCT
sd.district_id,
sd.warehouse_id as warehouse_id,
so.retailer_id as retailer_id,
c.name_ar as cat,
sd.product_id


FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
join products p on p.id = pso.product_id
join brands b on b.id = p.brand_id 
join categories c on c.id = p.category_id 
join selected_districts sd on sd.cat = c.name_ar and sd.district_id = districts.id and b.name_ar = sd.brand
           

WHERE   True
    AND so.created_at::date >= current_date - 60
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
	

),
buy_product as (
SELECT  DISTINCT
sd.district_id,
sd.warehouse_id as warehouse_id,
so.retailer_id as retailer_id,
pso.product_id,
c.name_ar as cat


FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
join products p on p.id = pso.product_id
join brands b on b.id = p.brand_id 
join categories c on c.id = p.category_id 
join selected_districts sd on sd.product_id = pso.product_id and sd.district_id = districts.id
           

WHERE   True
    AND so.created_at::date >= current_date - 60
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

)

select distinct bc.retailer_id,bc.product_id,bc.warehouse_id
from buy_cat bc 
left join buy_product bp  on bc.retailer_id = bp.retailer_id and bc.cat = bp.cat and bc.product_id = bp.product_id
where bp.product_id is null 
'''
cat_not_product =  query_snowflake(query, columns = ['retailer_id','product_id','warehouse_id'])
cat_not_product.columns = cat_not_product.columns.str.lower()
for col in cat_not_product.columns:
    cat_not_product[col] = pd.to_numeric(cat_not_product[col], errors='ignore') 
cat_not_product    

In [None]:
query = '''
select retailer_id
from (
SELECT  DISTINCT
retailer_id,
sales_order_status_id,
created_at::date as o_date ,
max(o_date)over(partition by retailer_id) as last_order
from sales_orders so 
WHERE  so.created_at ::date >= current_date - 120
AND so.sales_order_status_id not in (7,12)
AND so.channel IN ('telesales','retailer')
qualify o_date = last_order
)
where sales_order_status_id <> 6 

union all 

select id as retailer_id 
from retailers 
where activation = 'false'
'''
exec_rets =  query_snowflake(query, columns = ['retailer_id'])
exec_rets.columns = exec_rets.columns.str.lower()
for col in exec_rets.columns:
    exec_rets[col] = pd.to_numeric(exec_rets[col], errors='ignore') 
exec_rets =  exec_rets.retailer_id.unique() 

In [None]:
all_retailers  = pd.concat([cat_not_product, churned_dropped]).drop_duplicates().reset_index(drop=True)
all_retailers = all_retailers[~all_retailers['retailer_id'].isin(exec_rets)]

In [None]:
final_df = product_final_df.merge(all_retailers,on=['warehouse_id','product_id'])
final_df['SKU_arr'] = final_df.apply(lambda x: [x['product_id'], 1, x['discount']], axis=1)
final_df