In [1]:
%%capture

# Upgrade pip
!pip install --upgrade pip
# Connectivity
!pip install psycopg2-binary  # PostgreSQL adapter
# !pip install snowflake-connector-python  # Snowflake connector
!pip install snowflake-connector-python==3.15.0 # Snowflake connector Older Version
!pip install snowflake-sqlalchemy  # Snowflake SQLAlchemy connector
!pip install warnings # Warnings management
# !pip install pyarrow # Serialization
!pip install keyring==23.11.0 # Key management
!pip install sqlalchemy==1.4.46 # SQLAlchemy
!pip install requests # HTTP requests
!pip install boto3 # AWS SDK
# !pip install slackclient # Slack API
!pip install oauth2client # Google Sheets API
!pip install gspread==5.9.0 # Google Sheets API
!pip install gspread_dataframe # Google Sheets API
!pip install google.cloud # Google Cloud
# Data manipulation and analysis
!pip install polars
!pip install pandas==2.2.1
!pip install numpy
# !pip install fastparquet
!pip install openpyxl # Excel file handling
!pip install xlsxwriter # Excel file handling
# Linear programming
!pip install pulp
# Date and time handling
!pip install --upgrade datetime
!pip install python-time
!pip install --upgrade pytz
# Progress bar
!pip install tqdm
# Database data types
!pip install db-dtypes
# Geospatial data handling
# !pip install geopandas
# !pip install shapely
# !pip install fiona
# !pip install haversine
# Plotting

# Modeling
!pip install statsmodels
!pip install scikit-learn

!pip install import-ipynb

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import calendar
import json
from datetime import date, timedelta
from oauth2client.service_account import ServiceAccountCredentials
import setup_environment_2
import importlib
import import_ipynb
import warnings
import demand_sku_cntrb
warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

  from pandas.core.computation.check import NUMEXPR_INSTALLED


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json
/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


In [3]:
pd.set_option('display.max_columns', None)

In [4]:
today = datetime.today()- timedelta(days=4)
month_start = today.replace(day=1)
first_part = (today - month_start).days

last_day = calendar.monthrange(today.year, today.month)[1]
second_part = (last_day - today.day)+1

In [5]:
def query_snowflake(query, columns=[]):
    import os
    import snowflake.connector
    import numpy as np
    import pandas as pd
    con = snowflake.connector.connect(
        user =  os.environ["SNOWFLAKE_USERNAME"],
        account= os.environ["SNOWFLAKE_ACCOUNT"],
        password= os.environ["SNOWFLAKE_PASSWORD"],
        database =os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        if len(columns) == 0:
            out = pd.DataFrame(np.array(cur.fetchall()))
        else:
            out = pd.DataFrame(np.array(cur.fetchall()),columns=columns)
        return out
    except Exception as e:
        print("Error: ", e)
    finally:
        cur.close()
        con.close()

In [6]:
command_string = '''
WITH
  whs AS (
    SELECT
      *
    FROM
      (
        VALUES
          ('Cairo', 'El-Marg', 38),
          ('Cairo', 'Mostorod', 1),
          ('Giza', 'Barageel', 236),
          ('Giza', 'Basatin', 39),
          ('Delta West', 'El-Mahala', 337),
          ('Delta West', 'Tanta', 8),
          ('Delta East', 'Mansoura FC', 339),
          ('Delta East', 'Sharqya', 170),
          ('Upper Egypt', 'Assiut FC', 501),
          ('Upper Egypt', 'Bani sweif', 401),
          ('Upper Egypt', 'Menya Samalot', 703),
          ('Upper Egypt', 'Sohag', 632),
          ('Alexandria', 'Khorshed Alex', 797)
      ) x (region, wh, warehouse_id)
  ),
  active_skus AS (
    SELECT
      CASE
        WHEN regions.name_en = 'Greater Cairo' THEN cities.name_en
        ELSE regions.name_en
      END AS region,
      pso.product_id AS product_id,
      sum(pso.total_price) AS nmv
    FROM
      sales_orders so
      JOIN product_sales_order pso ON so.id = pso.sales_order_id
      JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
      JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
      JOIN cities ON cities.id = districts.city_id
      JOIN states ON states.id = cities.state_id
      JOIN regions ON regions.id = states.region_id
    WHERE
      so.created_at::DATE >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '3 month')
      AND sales_order_status_id NOT IN (7, 12)
      AND so.channel IN ('telesales', 'retailer')
      AND pso.PURCHASED_ITEM_COUNT > 0
    GROUP BY
      ALL
    HAVING
      nmv > 0
  ),
  -- cat_target as (
  -- ),
  weighted_fill_rate AS (
    SELECT
      DAY,
      product_id,
      region,
      1 - (oos / total) AS fill_rate
    FROM
      (
        SELECT
          timestamp::DATE AS DAY,
          whs.region,
          categories.name_ar AS cat,
          brands.name_ar AS brand,
          products.id AS product_id,
          nmv,
          count(
            CASE
              WHEN ss.activation = FALSE
              OR ss.available_stock = 0 THEN timestamp
            END
          ) AS oos,
          count(timestamp) AS total
        FROM
          materialized_views.STOCK_SNAP_SHOTS_RECENT ss
          JOIN whs ON whs.warehouse_id = ss.warehouse_id
          JOIN products ON products.id = ss.product_id
          JOIN product_units ON product_units.id = products.unit_id
          JOIN categories ON categories.id = products.category_id
          JOIN brands ON brands.id = products.brand_id
          JOIN active_skus ON active_skus.region = whs.region
          AND active_skus.product_id = ss.product_id
        WHERE
          ss.timestamp::DATE >= DATE_TRUNC('month', CURRENT_DATE)
        GROUP BY
          ALL
      )
  ),
  target_margin AS (
    SELECT DISTINCT
      cat,
      margin AS target_bm
    FROM
      performance.commercial_targets cplan
    QUALIFY
      CASE
        WHEN DATE_TRUNC('month', MAX(DATE) OVER ()) = DATE_TRUNC('month', CURRENT_DATE) THEN DATE_TRUNC('month', CURRENT_DATE)
        ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month')
      END = DATE_TRUNC('month', DATE)
  ),
  GC_NMV AS (
    SELECT
      CASE
        WHEN regions.name_en LIKE '%Delta%' THEN 'Delta'
        ELSE regions.name_en
      END AS region,
      CASE
        WHEN regions.id = 2 THEN cities.name_en
        ELSE regions.name_en
      END AS region_city,
      categories.name_ar AS cat,
      b.name_ar AS brand,
      sum(pso.total_price) AS nmv,
      CASE
        WHEN sum(pso.total_price) <> 0 THEN sum(pso.total_price) / sum(nmv) over (
          PARTITION BY
            region,
            cat,
            brand
        )
        ELSE 0
      END AS cntrb
    FROM
      sales_orders so
      JOIN product_sales_order pso ON so.id = pso.sales_order_id
      JOIN products ON products.id = pso.product_id
      JOIN brands b ON b.id = products.brand_id
      JOIN categories ON categories.id = products.category_id
      JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
      JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
      JOIN cities ON cities.id = districts.city_id
      JOIN states ON states.id = cities.state_id
      JOIN regions ON regions.id = states.region_id
    WHERE
      date_trunc(MONTH, so.created_at::DATE) BETWEEN date_trunc(MONTH, CURRENT_DATE) - interval '3 month' AND date_trunc(MONTH, CURRENT_DATE)  -1
      AND channel <> 'admin'
      AND sales_order_status_id NOT IN (7, 12)
    GROUP BY
      ALL
    ORDER BY
      cat,
      brand
  ),
  comm_plan AS (
    SELECT
      DATE,
      city AS region,
      cat,
      brand,
      sum(targets.nmv) AS target,
    FROM
      performance.commercial_targets targets
    WHERE
      targets.date >= date_trunc('month', CURRENT_DATE)
      AND targets.date <= CURRENT_DATE -1
    GROUP BY
      ALL
  ),
  c_t_cntrb AS (
    SELECT
      CASE
        WHEN regions.id = 2 THEN cities.name_en
        ELSE regions.name_en
      END AS region,
      pso.product_id,
      categories.name_ar AS cat,
      b.name_ar AS brand,
      sum(pso.total_price) AS nmv,
      sum(pso.total_price) / sum(nmv) over (
        PARTITION BY
          region,
          cat,
          brand
      ) AS CNTRB
    FROM
      product_sales_order pso
      JOIN sales_orders so ON so.id = pso.sales_order_id
      JOIN products ON products.id = pso.product_id
      JOIN materialized_views.retailer_polygon rp ON rp.retailer_id = so.retailer_id
      JOIN districts d ON d.id = rp.district_id
      JOIN cities ON cities.id = d.city_id
      JOIN categories ON categories.id = products.category_id
      JOIN brands b ON b.id = products.brand_id
      JOIN sections ON sections.id = categories.section_id
      JOIN states ON states.id = cities.state_id
      JOIN regions ON regions.id = states.region_id
    WHERE
      so.created_at::DATE BETWEEN date_trunc('month', CURRENT_DATE - interval '3 month') AND date_trunc('month', CURRENT_DATE)  -1
      AND so.sales_order_status_id NOT IN (7, 12)
      AND pso.purchased_item_count <> 0
      AND so.channel IN ('retailer', 'telesales')
      AND products.id NOT IN (
        SELECT
          product_id
        FROM
          materialized_views.PL_SKUS_FORMATs
        WHERE
          PL_COMPETITION = 'PL'
      )
    GROUP BY
      ALL
  ),
  cat_brand_target AS (
    SELECT
      cp.*,
      region_city,
      cntrb,
      target * cntrb AS brand_cat_target
    FROM
      comm_plan cp
      JOIN GC_NMV gc ON cp.cat = gc.cat
      AND cp.brand = gc.brand
      AND cp.region = gc.region
  ),
  skus_targets AS (
    SELECT
      DATE,
      region,
      product_id,
      cat,
      brand,
      brand_cat_target * cntrb AS sku_target
    FROM
      (
        SELECT
          c.date,
          p.*,
          brand_cat_target,
          sum(p.cntrb) over (
            PARTITION BY
              p.cat,
              p.brand,
              p.region,
              DATE
          ) AS all_cntrb
        FROM
          c_t_cntrb p
          JOIN cat_brand_target c ON c.region_city = p.region
          AND c.cat = p.cat
          AND c.brand = p.brand
      )
    ORDER BY
      1
  ),
  prs_in AS (
    WITH
      prs AS (
        SELECT DISTINCT
          purchased_receipts.date::DATE AS pr_date,
          max(date_part('hour', purchased_receipts.date)) AS HOUR,
          products.id AS product_id,
          CONCAT(
            products.name_ar,
            ' ',
            products.size,
            ' ',
            product_units.name_ar
          ) AS sku,
          brands.name_ar AS Brand,
          categories.name_ar AS category,
          purchased_receipts.warehouse_id AS warehouse_id,
          warehouses.name AS warehouse,
          sum(
            product_purchased_receipts.purchased_item_count * product_purchased_receipts.basic_unit_count
          ) AS purchase_min_count,
        FROM
          product_purchased_receipts
          LEFT JOIN products ON products.id = product_purchased_receipts.product_id
          LEFT JOIN packing_unit_products ON packing_unit_products.product_id = products.id
          LEFT JOIN purchased_receipts ON purchased_receipts.id = product_purchased_receipts.purchased_receipt_id
          LEFT JOIN purchased_receipt_statuses ON purchased_receipt_statuses.id = purchased_receipts.purchased_receipt_status_id
          LEFT JOIN packing_units ON packing_units.id = product_purchased_receipts.packing_unit_id
          LEFT JOIN product_units ON products.unit_id = product_units.id
          LEFT JOIN suppliers ON suppliers.id = purchased_receipts.supplier_id
          LEFT JOIN brands ON brands.id = products.brand_id
          LEFT JOIN categories ON categories.id = products.category_id
          LEFT JOIN warehouses ON warehouses.id = purchased_receipts.warehouse_id
        WHERE
          product_purchased_receipts.purchased_item_count <> 0
          AND purchased_receipts.purchased_receipt_status_id IN (4, 5, 7)
          AND purchased_receipts.date::DATE >= date_trunc('month', CURRENT_DATE)
          --  AND purchased_receipts.is_actual = 'true'
        GROUP BY
          ALL
      ),
      sales_cntrb AS (
        SELECT
          date_part('hour', so.created_at) AS HOUR,
          pso.warehouse_id AS warehouse_id,
          sum(pso.total_price) AS nmv
        FROM
          sales_orders so
          JOIN product_sales_order pso ON so.id = pso.sales_order_id
        WHERE
          so.created_at::DATE BETWEEN CURRENT_DATE -30 AND CURRENT_DATE  -1
          AND sales_order_status_id NOT IN (7, 12)
          AND pso.purchased_item_count <> 0
          AND channel <> 'admin'
        GROUP BY
          ALL
      ),
      whs AS (
        SELECT
          *
        FROM
          (
            VALUES
              ('Cairo', 'El-Marg', 38),
              ('Cairo', 'Mostorod', 1),
              ('Giza', 'Barageel', 236),
              ('Giza', 'Basatin', 39),
              ('Delta West', 'El-Mahala', 337),
              ('Delta West', 'Tanta', 8),
              ('Delta East', 'Mansoura FC', 339),
              ('Delta East', 'Sharqya', 170),
              ('Upper Egypt', 'Assiut FC', 501),
              ('Upper Egypt', 'Bani sweif', 401),
              ('Upper Egypt', 'Menya Samalot', 703),
              ('Upper Egypt', 'Sohag', 632),
              ('Alexandria', 'Khorshed Alex', 797)
          ) x (region, wh, warehouse_id)
      )
    SELECT
      pr_date,
      product_id,
      sku,
      region,
      round(sum((1 - cntrb) * purchase_min_count), 0) AS in_stocks
    FROM
      (
        SELECT
          x.pr_date,
          x.hour,
          x.product_id,
          x.sku,
          x.warehouse_id,
          x.purchase_min_count,
          x.cntrb,
          region
        FROM
          (
            SELECT
              prs.*,
              sum(
                CASE
                  WHEN sc.hour < prs.hour THEN nmv
                END
              ) / sum(nmv) AS cntrb
            FROM
              prs
              LEFT JOIN sales_cntrb sc ON prs.warehouse_id = sc.warehouse_id
            GROUP BY
              ALL
          ) x
          JOIN whs ON whs.warehouse_id = x.warehouse_id
      )
    GROUP BY
      ALL
  ),
brands_cat_ability as (  
select date,region,cat,brand,sum(max_nmv) as max_nmv , sum(sku_target) as target,
      CASE
        WHEN sum(max_nmv) / target > 1 THEN 1
        ELSE sum(max_nmv) / target
      END AS ability_ach
from (
    SELECT
      y.*,
      sku_target,
      CASE
        WHEN max_nmv / sku_target > 1 THEN 1
        ELSE max_nmv / sku_target
      END AS ach,
      sku_target / sum(sku_target) over (
        PARTITION BY
          y.product_id,
          y.region
      ) AS cntrb
    FROM
      (
        SELECT
          DATE,
          product_id,
          cat,
          brand,
          region,
          CASE
            WHEN fill_rate > 0 THEN day_stocks * price
            ELSE 0
          END AS max_nmv,
          day_stocks,
          fill_rate
        FROM
          (
            SELECT
              *,
              coalesce(
                CASE
                  WHEN opening_stocks = 0
                  AND closing_stocks > 0 THEN fill_rate * closing_stocks
                  WHEN closing_stocks > opening_stocks
                  AND in_stocks IS NOT NULL THEN opening_stocks + in_stocks
                  ELSE opening_stocks
                END,
                0
              ) AS day_stocks,
              wac_p / (1 - target_bm) AS price,
            FROM
              (
                SELECT
                  x.*,
                  coalesce(wfr.fill_rate, 0) AS fill_rate,
                  wac_p,
                  target_bm,
                  pin.in_stocks
                FROM
                  (
                    SELECT
                      *,
                      lag(closing_stocks) over (
                        PARTITION BY
                          product_id,
                          region
                        ORDER BY
                          DATE
                      ) AS opening_stocks
                    FROM
                      (
                        SELECT
                          sdc.timestamp::DATE AS DATE,
                          sdc.product_id,
                          c.name_ar AS cat,
                          b.name_ar AS brand,
                          whs.region,
                          sum(sdc.AVAILABLE_STOCK) AS closing_stocks
                        FROM
                          MATERIALIZED_VIEWS.STOCK_DAY_CLOSE sdc
                          JOIN whs ON whs.warehouse_id = sdc.warehouse_id
                          JOIN products p ON p.id = sdc.product_id
                          JOIN brands b ON b.id = p.BRAND_ID
                          JOIN categories c ON c.id = p.category_id
                        WHERE
                          timestamp::DATE >= date_trunc('month', CURRENT_DATE) -1
                        GROUP BY
                          ALL
                      )
                    ORDER BY
                      DATE
                  ) x
                  LEFT JOIN weighted_fill_rate wfr ON wfr.product_id = x.product_id
                  AND x.date = wfr.day
                  AND x.region = wfr.region
                  LEFT JOIN finance.all_cogs f ON f.product_id = x.product_id
                  AND f.from_date::DATE <= x.date
                  AND f.to_date::DATE > x.date
                  LEFT JOIN target_margin tm ON tm.cat = x.cat
                  LEFT JOIN prs_in pin ON pin.product_id = x.product_id
                  AND pin.region = x.region
                  AND pin.pr_date = x.date
              )
          ) z
      ) y
      JOIN skus_targets t ON t.cat = y.cat
      AND t.brand = y.brand
      AND y.date = t.date
      AND y.product_id = t.product_id
      AND y.region = t.region
)
group by all 
),
sales as (
SELECT  DISTINCT
		so.created_at::date as date,
		case when regions.id = 2 then cities.name_en else regions.name_en end as region,
		brands.name_ar as brand,
		categories.name_ar as cat,
		sum(pso.total_price) as total_nmv
		
FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id
JOIN categories ON products.category_id = categories.id 

JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id

JOIN    finance.all_cogs f ON f.product_id = pso.product_id
                    AND f.from_date::date <= so.created_at::date
                    AND f.to_date::date > so.created_at::date

WHERE   
     so.created_at::date >= date_trunc('month',CURRENT_DATE)
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL


) 
select region,cat,brand,sum(Pricing_ach*all_cntrb) as mtd_ach,sum(three_days_cntrb*Pricing_ach) as last_three_ach
from (
select *, case when date>= CURRENT_date - 3 then target/last_days else 0 end as three_days_cntrb
from (
select *, target/sum(target)over(PARTITION by cat,brand,region) as all_cntrb,sum(case when date >= CURRENT_DATE - 3 then target end)over(PARTITION by cat,brand,region) as last_days
from (
select * ,
case when actual_ach > ability_ach then 1 
when actual_ach = 0 and ability_ach = 0 then 1 
else actual_ach/ability_ach end as Pricing_ach
from (
select bca.*,total_nmv,case when coalesce(total_nmv,0)/target > 1 then 1 else coalesce(total_nmv,0)/target end as actual_ach 
from brands_cat_ability bca 
left join sales s on s.date = bca.date and s.region = bca.region and s.cat = bca.cat and s.brand = bca.brand
)
)
)
)
group by all 
'''
cat_brand_perf= query_snowflake(command_string, columns = ['region','cat','brand','mtd_ach','last_3_days_ach'])
cat_brand_perf.last_3_days_ach = pd.to_numeric(cat_brand_perf.last_3_days_ach)
cat_brand_perf.mtd_ach = pd.to_numeric(cat_brand_perf.mtd_ach)
cat_brand_perf.last_3_days_ach = round(cat_brand_perf.last_3_days_ach,2)
cat_brand_perf.mtd_ach = round(cat_brand_perf.mtd_ach,2)

In [7]:
command_string = '''
WITH
  region_split AS (
    SELECT
      CASE
        WHEN regions.name_en LIKE '%Delta%' THEN 'Delta'
        ELSE regions.name_en
      END AS region,
      CASE
        WHEN regions.id = 2 THEN cities.name_en
        ELSE regions.name_en
      END AS region_city,
      categories.name_ar AS cat,
      b.name_ar AS brand,
      sum(pso.total_price) AS nmv,
      CASE
        WHEN sum(pso.total_price) <> 0 THEN sum(pso.total_price) / sum(nmv) over (
          PARTITION BY
            region,
            cat,
            brand
        )
        ELSE 0
      END AS cntrb
    FROM
      sales_orders so
      JOIN product_sales_order pso ON so.id = pso.sales_order_id
      JOIN products ON products.id = pso.product_id
      JOIN brands b ON b.id = products.brand_id
      JOIN categories ON categories.id = products.category_id
      JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
      JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
      JOIN cities ON cities.id = districts.city_id
      JOIN states ON states.id = cities.state_id
      JOIN regions ON regions.id = states.region_id
    WHERE
      date_trunc(MONTH, so.created_at::DATE) BETWEEN date_trunc(MONTH, CURRENT_DATE - interval '3 month') AND  CURRENT_DATE  -1
      AND channel <> 'admin'
      AND sales_order_status_id NOT IN (7, 12)
    GROUP BY
      ALL
  ),
  targets as (
    SELECT
      case when city = 'Alex' then 'Alexandria' else city end  AS region,
      cat,
      brand,
      sum(targets.nmv) AS target,
      avg(margin) as target_margin
    FROM
      performance.commercial_targets targets
    WHERE targets.date = CURRENT_DATE
    GROUP BY
      ALL
)
select region,cat,brand,target*cntrb as target,target_margin
from (
select rs.region_city as region,t.cat as cat,t.brand as brand,target,cntrb,target_margin
from targets t
left join region_split rs on t.brand = rs.brand and t.cat = rs.cat and t.region = rs.region
)
'''
day_target = query_snowflake(command_string, columns = ['region','cat','brand','todays_target','target_margin'])
day_target.todays_target = pd.to_numeric(day_target.todays_target)
day_target.target_margin = pd.to_numeric(day_target.target_margin)

In [8]:
Skus_cntrb = pd.read_excel('Skus_cntrb.xlsx').drop('Unnamed: 0', axis=1)

In [9]:
command_string = '''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38),
                            ('Cairo', 'Mostorod', 1),
                            ('Giza', 'Barageel', 236),
                            ('Giza', 'Basatin', 39),
                            ('Delta West', 'El-Mahala', 337),
                            ('Delta West', 'Tanta', 8),
                            ('Delta East', 'Mansoura FC', 339),
                            ('Delta East', 'Sharqya', 170),
                            ('Upper Egypt', 'Assiut FC', 501),
                            ('Upper Egypt', 'Bani sweif', 401),
                            ('Upper Egypt', 'Menya Samalot', 703),
                            ('Upper Egypt', 'Sohag', 632),
                            ('Alexandria', 'Khorshed Alex', 797))
                    x(region, wh, warehouse_id)),
stocks as (
SELECT  region,
        product_id,
        SUM(stocks) as stocks
FROM    (SELECT DISTINCT whs.region,
                whs.wh,
                product_warehouse.product_id,
                (product_warehouse.available_stock)::integer as stocks,
        from whs
        JOIN product_warehouse ON product_warehouse.warehouse_id = whs.warehouse_id
        JOIN products on product_warehouse.product_id = products.id
        JOIN product_units ON products.unit_id = product_units.id
        where   product_warehouse.warehouse_id not in (6,9,10)
            AND product_warehouse.is_basic_unit = 1)
GROUP BY 1,2),

tool_rr as (
            SELECT
              product_id,
              warehouse_id,
              MAX(running_rate) AS rr_minunit
            FROM
              retool.po_initial_parameters
            WHERE
              created_at::DATE = (
                SELECT
                  MAX(created_at::DATE)
                FROM
                  retool.po_initial_parameters
              )

            GROUP BY ALL


),
region_rr as (
select region,product_id,sum(rr_minunit) as region_rr
from tool_rr  
left join whs on tool_rr.warehouse_id = whs.warehouse_id
group by all 
),
prices as (
select           CASE WHEN cpu.cohort_id IN (695, 700) THEN 'Cairo'
                 WHEN cpu.cohort_id IN (701) THEN 'Giza'
                 WHEN cpu.cohort_id IN (698, 704) THEN 'Delta East'
                 WHEN cpu.cohort_id IN (697, 703) THEN 'Delta West'
                 WHEN cpu.cohort_id IN (696, 705) THEN 'Upper Egypt'
                 WHEN cpu.cohort_id IN (699, 702) THEN 'Alexandria'
				ELSE cpu.cohort_id::varchar END as region,

product_id,price,weight,price/nullif((weight/1000),0) as vw
from  COHORT_PRODUCT_PACKING_UNITS  cpu
JOIN  packing_unit_products ON packing_unit_products.id = cpu.PRODUCT_PACKING_UNIT_ID
where cohort_id in (700,701,702,703,704,705)
and price is not null
and is_basic_unit = 1 
),
ops as (
select product_id,region,case when nmv <> 0 then ops_cost/nmv else 0 end as ops
from (
select product_id,c.region,cat.name_ar as cat,b.name_ar as brand, sum(NMV_EX_VAT) as nmv,sum(lm_cost+wh_cost+mm_cost) as ops_cost,
sum(nmv) over(partition by cat.name_ar,b.name_ar) as brand_cat_nmv
from finance.sku_costs c 
join products p on p.id = c.product_id 
join brands b on b.id = p.brand_id 
join CATEGORies cat on cat.id = p.category_id
where month between  date_trunc('month',current_date - interval '3 month') and date_trunc('month',current_date)-1
group by all 
)
)
select s.*,b.name_ar as brand,c.name_ar as cat,f.wac_p as wac_p,prices.price,ops,prices.vw, COALESCE(region_rr,0) as rr,stocks/(case when rr <> 0 then rr else 1 end) as doh 
from stocks s 
join products p on p.id = s.product_id
join brands b on b.id = p.brand_id 
join categories c on c.id = p.category_id
join prices on prices.product_id = s.product_id and prices.region = s.region
join finance.all_cogs f on f.product_id = s.product_id and current_timestamp between f.from_date and f.to_date
left join region_rr r on s.region = r.region and s.product_id = r.product_id
join ops on ops.product_id = s.product_id and
ops.region = case when s.region like '%Delta%' then 'Delta' when s.region in ('Cairo','Giza') Then 'Greater Cairo' else s.region end 

'''
product_stocks = query_snowflake(command_string, columns = ['region','product_id','stocks','brand','cat','wac_p','price','ops','vw','rr','doh'])
product_stocks.product_id = pd.to_numeric(product_stocks.product_id)
product_stocks.stocks = pd.to_numeric(product_stocks.stocks)
product_stocks.rr = pd.to_numeric(product_stocks.rr)
product_stocks.doh = pd.to_numeric(product_stocks.doh)
product_stocks.price = pd.to_numeric(product_stocks.price)
product_stocks.wac_p = pd.to_numeric(product_stocks.wac_p)
product_stocks.vw = pd.to_numeric(product_stocks.vw)
product_stocks.ops = pd.to_numeric(product_stocks.ops)

In [10]:
command_string = '''
with fill_rate as (



WITH
  whs AS (
    SELECT
      *
    FROM
      (
        VALUES
          ('Cairo', 'El-Marg', 38),
          ('Cairo', 'Mostorod', 1),
          ('Giza', 'Barageel', 236),
          ('Giza', 'Basatin', 39),
          ('Delta West', 'El-Mahala', 337),
          ('Delta West', 'Tanta', 8),
          ('Delta East', 'Mansoura FC', 339),
          ('Delta East', 'Sharqya', 170),
          ('Upper Egypt', 'Assiut FC', 501),
          ('Upper Egypt', 'Bani sweif', 401),
          ('Upper Egypt', 'Menya Samalot', 703),
          ('Upper Egypt', 'Sohag', 632),
          ('Alexandria', 'Khorshed Alex', 797)
      ) x (region, wh, warehouse_id)
  ),
  active_skus AS (
    SELECT
      CASE
        WHEN regions.name_en = 'Greater Cairo' THEN cities.name_en
        ELSE regions.name_en
      END AS region,
      pso.product_id AS product_id,
      sum(pso.total_price) AS nmv
    FROM
      sales_orders so
      JOIN product_sales_order pso ON so.id = pso.sales_order_id
      JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
      JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
      JOIN cities ON cities.id = districts.city_id
      JOIN states ON states.id = cities.state_id
      JOIN regions ON regions.id = states.region_id
    WHERE
      so.created_at::DATE >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '3 month')
      AND sales_order_status_id NOT IN (7, 12)
      AND so.channel IN ('telesales', 'retailer')
      AND pso.PURCHASED_ITEM_COUNT > 0
    GROUP BY
      ALL
    HAVING
      nmv > 0
  ),
  -- cat_target as (
  -- ),
  weighted_fill_rate AS (
    SELECT
      DAY,
      product_id,
      region,
      1 - (oos / total) AS fill_rate
    FROM
      (
        SELECT
          timestamp::DATE AS DAY,
          whs.region,
          categories.name_ar AS cat,
          brands.name_ar AS brand,
          products.id AS product_id,
          nmv,
          count(
            CASE
              WHEN ss.activation = FALSE
              OR ss.available_stock = 0 THEN timestamp
            END
          ) AS oos,
          count(timestamp) AS total
        FROM
          materialized_views.STOCK_SNAP_SHOTS_RECENT ss
          JOIN whs ON whs.warehouse_id = ss.warehouse_id
          JOIN products ON products.id = ss.product_id
          JOIN product_units ON product_units.id = products.unit_id
          JOIN categories ON categories.id = products.category_id
          JOIN brands ON brands.id = products.brand_id
          JOIN active_skus ON active_skus.region = whs.region
          AND active_skus.product_id = ss.product_id
        WHERE
          ss.timestamp::DATE >= DATE_TRUNC('month', CURRENT_DATE - INTERVAL '3 months')
        GROUP BY
          ALL
      )
  ),
  prs_in AS (
    WITH
      prs AS (
        SELECT DISTINCT
          purchased_receipts.date::DATE AS pr_date,
          max(date_part('hour', purchased_receipts.date)) AS HOUR,
          products.id AS product_id,
          CONCAT(
            products.name_ar,
            ' ',
            products.size,
            ' ',
            product_units.name_ar
          ) AS sku,
          brands.name_ar AS Brand,
          categories.name_ar AS category,
          purchased_receipts.warehouse_id AS warehouse_id,
          warehouses.name AS warehouse,
          sum(
            product_purchased_receipts.purchased_item_count * product_purchased_receipts.basic_unit_count
          ) AS purchase_min_count,
        FROM
          product_purchased_receipts
          LEFT JOIN products ON products.id = product_purchased_receipts.product_id
          LEFT JOIN packing_unit_products ON packing_unit_products.product_id = products.id
          LEFT JOIN purchased_receipts ON purchased_receipts.id = product_purchased_receipts.purchased_receipt_id
          LEFT JOIN purchased_receipt_statuses ON purchased_receipt_statuses.id = purchased_receipts.purchased_receipt_status_id
          LEFT JOIN packing_units ON packing_units.id = product_purchased_receipts.packing_unit_id
          LEFT JOIN product_units ON products.unit_id = product_units.id
          LEFT JOIN suppliers ON suppliers.id = purchased_receipts.supplier_id
          LEFT JOIN brands ON brands.id = products.brand_id
          LEFT JOIN categories ON categories.id = products.category_id
          LEFT JOIN warehouses ON warehouses.id = purchased_receipts.warehouse_id
        WHERE
          product_purchased_receipts.purchased_item_count <> 0
          AND purchased_receipts.purchased_receipt_status_id IN (4, 5, 7)
          AND purchased_receipts.date::DATE >= date_trunc('month', CURRENT_DATE - interval '3 months')
          --  AND purchased_receipts.is_actual = 'true'
        GROUP BY
          ALL
      ),
      sales_cntrb AS (
        SELECT
          date_part('hour', so.created_at) AS HOUR,
          pso.warehouse_id AS warehouse_id,
          sum(pso.total_price) AS nmv
        FROM
          sales_orders so
          JOIN product_sales_order pso ON so.id = pso.sales_order_id
        WHERE
          so.created_at::DATE BETWEEN CURRENT_DATE -30 AND CURRENT_DATE  -1
          AND sales_order_status_id NOT IN (7, 12)
          AND pso.purchased_item_count <> 0
          AND channel <> 'admin'
        GROUP BY
          ALL
      ),
      whs AS (
        SELECT
          *
        FROM
          (
            VALUES
              ('Cairo', 'El-Marg', 38),
              ('Cairo', 'Mostorod', 1),
              ('Giza', 'Barageel', 236),
              ('Giza', 'Basatin', 39),
              ('Delta West', 'El-Mahala', 337),
              ('Delta West', 'Tanta', 8),
              ('Delta East', 'Mansoura FC', 339),
              ('Delta East', 'Sharqya', 170),
              ('Upper Egypt', 'Assiut FC', 501),
              ('Upper Egypt', 'Bani sweif', 401),
              ('Upper Egypt', 'Menya Samalot', 703),
              ('Upper Egypt', 'Sohag', 632),
              ('Alexandria', 'Khorshed Alex', 797)
          ) x (region, wh, warehouse_id)
      )
    SELECT
      pr_date,
      product_id,
      sku,
      region,
      round(sum((1 - cntrb) * purchase_min_count), 0) AS in_stocks
    FROM
      (
        SELECT
          x.pr_date,
          x.hour,
          x.product_id,
          x.sku,
          x.warehouse_id,
          x.purchase_min_count,
          x.cntrb,
          region
        FROM
          (
            SELECT
              prs.*,
              sum(
                CASE
                  WHEN sc.hour < prs.hour THEN nmv
                END
              ) / sum(nmv) AS cntrb
            FROM
              prs
              LEFT JOIN sales_cntrb sc ON prs.warehouse_id = sc.warehouse_id
            GROUP BY
              ALL
          ) x
          JOIN whs ON whs.warehouse_id = x.warehouse_id
      )
    GROUP BY
      ALL
  )
        SELECT
          DATE,
          product_id,
          cat,
          brand,
          region,
          day_stocks,
          fill_rate
        FROM
          (
            SELECT
              *,
              coalesce(
                CASE
                  WHEN opening_stocks = 0
                  AND closing_stocks > 0 and fill_rate is not null THEN fill_rate * closing_stocks
                  WHEN closing_stocks > opening_stocks
                  AND in_stocks IS NOT NULL THEN opening_stocks + in_stocks
                  ELSE opening_stocks
                END,
                0
              ) AS day_stocks,
            FROM
              (
                SELECT
                  x.*,
                  coalesce(wfr.fill_rate, 0) AS fill_rate,
                  wac_p,
                  pin.in_stocks
                FROM
                  (
                    SELECT
                      *,
                      lag(closing_stocks) over (
                        PARTITION BY
                          product_id,
                          region
                        ORDER BY
                          DATE
                      ) AS opening_stocks
                    FROM
                      (
                        SELECT
                          sdc.timestamp::DATE AS DATE,
                          sdc.product_id,
                          c.name_ar AS cat,
                          b.name_ar AS brand,
                          whs.region,
                          sum(sdc.AVAILABLE_STOCK) AS closing_stocks
                        FROM
                          MATERIALIZED_VIEWS.STOCK_DAY_CLOSE sdc
                          JOIN whs ON whs.warehouse_id = sdc.warehouse_id
                          JOIN products p ON p.id = sdc.product_id
                          JOIN brands b ON b.id = p.BRAND_ID
                          JOIN categories c ON c.id = p.category_id
                        WHERE
                          timestamp::DATE  BETWEEN date_trunc('month',CURRENT_DATE - interval '3 months')  AND CURRENT_DATE - 1
                        GROUP BY
                          ALL
                      )
                    ORDER BY
                      DATE
                  ) x
                  LEFT JOIN weighted_fill_rate wfr ON wfr.product_id = x.product_id
                  AND x.date = wfr.day
                  AND x.region = wfr.region
                  LEFT JOIN finance.all_cogs f ON f.product_id = x.product_id
                  AND f.from_date::DATE <= x.date
                  AND f.to_date::DATE > x.date
                  LEFT JOIN prs_in pin ON pin.product_id = x.product_id
                  AND pin.region = x.region
                  AND pin.pr_date = x.date
              )
          ) z
        WHERE
          DATE BETWEEN date_trunc('month',CURRENT_DATE - interval '3 months')  AND CURRENT_DATE - 1
),
sales AS (
  select * 
  from (
  select s.*,  day_stocks,
  PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY qty) OVER (partition by s.region,s.product_id ) AS q1,
  PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY qty) OVER (partition by s.region,s.product_id ) AS q3,
  q3+(1.2*(q3-q1)) as max,
	q1-(1.2*(q3-q1)) as min
  from (
    SELECT DISTINCT
	case when regions.id = 2 then cities.name_en else regions.name_en end as region,
      so.created_at::DATE AS date,
      pso.product_id,
      CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) AS sku,
      brands.name_ar AS brand,
      categories.name_ar AS cat,
      SUM(pso.total_price) AS nmv,
      SUM(pso.purchased_item_count * basic_unit_count) AS qty
	  
    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN products ON products.id = pso.product_id
    JOIN brands ON products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id
	JOIN    materialized_views.retailer_polygon poly ON poly.retailer_id = so.retailer_id
	JOIN    districts ON districts.id = poly.district_id
	JOIN    cities ON cities.id = districts.city_id
	JOIN    states ON states.id = cities.STATE_ID
	JOIN    regions ON regions.id = states.region_id
    JOIN finance.all_cogs f ON f.product_id = pso.product_id
      AND f.from_date::DATE <= so.created_at::DATE
      AND f.to_date::DATE > pso.created_at::DATE
    JOIN product_units ON product_units.id = products.unit_id
    WHERE so.created_at::DATE BETWEEN date_trunc('month',CURRENT_DATE - interval '3 months')  AND CURRENT_DATE - 1
      AND so.sales_order_status_id NOT IN (7, 12)
      AND so.channel IN ('telesales', 'retailer')
      AND pso.purchased_item_count <> 0
	  AND TO_CHAR(so.created_at::date, 'Day') <> 'Fri'
    GROUP BY ALL
	)s
	join fill_rate fr on fr.product_id = s.product_id and fr.region = s.region and fr.date = s.date
	)
	where qty between min and max 
	and qty <= day_stocks*0.95
  ),

  cohort_prices AS (
    SELECT CASE WHEN cohort_id = 700 THEN 'Cairo'
                             WHEN cohort_id = 701 THEN 'Giza'
                             WHEN cohort_id = 702 THEN 'Alexandria'
                             WHEN cohort_id = 703 THEN 'Delta West'
                             WHEN cohort_id = 704 THEN 'Delta East'
                             WHEN cohort_id = 705 THEN 'Upper Egypt'
                        ELSE cohort_id::varchar END as region,
      pup.product_id,
      cpc.price,
      cpc.created_at AS change_date,
      COALESCE(
        LEAD(cpc.created_at) OVER (PARTITION BY cohort_id, pup.product_id ORDER BY cpc.created_at),
        CURRENT_TIMESTAMP
      ) AS next_change
    FROM cohort_pricing_changes cpc
    LEFT JOIN packing_unit_products pup ON pup.id = cpc.product_packing_unit_id
    WHERE pup.is_basic_unit = 1
      AND cpc.created_at::DATE BETWEEN date_trunc('month',CURRENT_DATE - interval '6 months')  AND CURRENT_DATE - 1
	  and cpc.cohort_id in (700,701,702,703,704,705)
  )
  
  SELECT
    region,
    product_id,
    sku,
    cat,
    brand,
	sum(nmv) as nmv,
    SUM(elasticty * qty_cntrb) AS elas
  FROM (
      SELECT *,
        qty / SUM(qty) OVER (PARTITION BY region, product_id) AS qty_cntrb
      FROM (
        SELECT *,
          CASE
            WHEN price_change <> 0 THEN qty_change / price_change
            ELSE 0
          END AS elasticty
        FROM (
          SELECT *,
            (qty - old_qty) / old_qty AS qty_change,
            (price - old_price) / old_price AS price_change
          FROM (
            SELECT *,
              LAG(qty) OVER (PARTITION BY product_id, region ORDER BY price) AS old_qty,
              LAG(price) OVER (PARTITION BY product_id, region ORDER BY price) AS old_price
            FROM (
  			select region ,product_id,sku,brand,cat,price,avg(nmv) as nmv,avg(qty) as qty
			  from(
              SELECT s.*, cp.price
              FROM sales s
              JOIN cohort_prices cp ON cp.region = s.region
                AND s.product_id = cp.product_id
                AND s.date >= cp.change_date
                AND s.date < cp.next_change
				 
				)
				group by all 
				order by price
            )
            QUALIFY old_qty IS NOT NULL
          )
          WHERE (
            (price_change > 0 AND qty_change < 0) OR
            (price_change < 0 AND qty_change > 0)
          )
        )
      )
    )
  GROUP BY ALL
  order by nmv desc
'''
product_elasticity = query_snowflake(command_string, columns = ['region','product_id','sku','cat','brand','nmv','elas'])
product_elasticity.product_id = pd.to_numeric(product_elasticity.product_id)
product_elasticity.nmv = pd.to_numeric(product_elasticity.nmv)
product_elasticity.elas = pd.to_numeric(product_elasticity.elas)

In [11]:
command_string = '''
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38),
                            ('Cairo', 'Mostorod', 1),
                            ('Giza', 'Barageel', 236),
                            ('Giza', 'Basatin', 39),
                            ('Delta West', 'El-Mahala', 337),
                            ('Delta West', 'Tanta', 8),
                            ('Delta East', 'Mansoura FC', 339),
                            ('Delta East', 'Sharqya', 170),
                            ('Upper Egypt', 'Assiut FC', 501),
                            ('Upper Egypt', 'Bani sweif', 401),
                            ('Upper Egypt', 'Menya Samalot', 703),
                            ('Upper Egypt', 'Sohag', 632),
                            ('Alexandria', 'Khorshed Alex', 797))
                    x(region, wh, warehouse_id))


select region, sum(nmv)/sum(weight) as value_to_weight ,STDDEV_POP(nmv/weight) as std
from (
select *,  PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY nmv) 
    OVER (PARTITION BY region) AS percentile_25
from (
SELECT
		date,	
        whs.region as region,
        sum(product_sales_order.total_price) as nmv,
        sum((packing_unit_products.weight*product_sales_order.PURCHASED_ITEM_COUNT)/1000.00) AS weight

    
    
    From
        sales_orders
    
        JOIN product_sales_order ON product_sales_order.sales_order_id = sales_orders.id
        join whs on whs.warehouse_id = product_sales_order.warehouse_id
         JOIN packing_unit_products ON product_sales_order.product_id = packing_unit_products.product_id AND product_sales_order.packing_unit_id = packing_unit_products.packing_unit_id

    
    WHERE sales_orders.CREATED_AT::date>=current_date-30
    AND sales_orders.sales_order_status_id NOT IN (7,12)
    
    GROUP BY all
)
)
where nmv >= percentile_25
GROUP BY all
'''
region_vw = query_snowflake(command_string, columns = ['region','region_vw','vw_std'])
region_vw.region_vw = pd.to_numeric(region_vw.region_vw)
region_vw.vw_std = pd.to_numeric(region_vw.vw_std)

Unnamed: 0,region,region_vw,vw_std
0,Cairo,41.697978,2.500899
1,Alexandria,40.739153,2.471356
2,Delta West,39.868775,2.375862
3,Delta East,39.809716,2.887111
4,Giza,43.915958,1.422001
5,Upper Egypt,32.951381,1.040741


In [12]:
cat_brand_data = cat_brand_perf.merge(day_target,on=['brand','cat','region'])



product_data=product_stocks.merge(Skus_cntrb,on=['product_id','region'])
product_data = product_data.merge(cat_brand_data,on=['brand','cat','region'])
product_data['nmv_ability'] = product_data['stocks']*product_data['price']




cat_brand_ability=product_data.groupby(['region','cat','brand']).agg({
    'todays_target': 'mean',
    'nmv_ability': 'sum'
}).reset_index()
cat_brand_ability['brand_todays_ability'] = np.minimum(cat_brand_ability['nmv_ability']/cat_brand_ability['todays_target'],1)


cat_ability=cat_brand_ability.groupby(['region','cat']).agg({
    'todays_target': 'sum',
    'nmv_ability': 'sum'
}).reset_index()
cat_ability['cat_today_ability'] = np.minimum(cat_ability['nmv_ability']/cat_ability['todays_target'],1)

cat_brand_ability = cat_brand_ability.merge(cat_ability[['region','cat','cat_today_ability']],on=['cat','region'])





cat_brand_ability['total_cat_target'] = cat_brand_ability.groupby(['region','cat'])['todays_target'].transform(sum)
cat_brand_ability['total_cat_ability'] = cat_brand_ability.groupby(['region','cat'])['nmv_ability'].transform(sum)
cat_brand_ability['total_cat_target']=cat_brand_ability['total_cat_target']*cat_brand_ability['cat_today_ability']
cat_brand_ability['ability_cntrb']=cat_brand_ability['nmv_ability']/cat_brand_ability['total_cat_ability']
cat_brand_ability['new_brand_target'] = np.maximum(cat_brand_ability['ability_cntrb'] * cat_brand_ability['total_cat_target'],
                                        np.minimum(cat_brand_ability['nmv_ability'],cat_brand_ability['todays_target']))


product_data = product_data.merge(cat_brand_ability[['region','cat','brand','new_brand_target']],on=['region','cat','brand'])
product_data['final_ach%'] = ((product_data['mtd_ach']* first_part)+(product_data['last_3_days_ach']*second_part))/(first_part+second_part)

product_data = product_data[product_data['new_brand_target']>0]

cond = [product_data['final_ach%']<1,product_data['final_ach%']>=1]
cho = [1-product_data['final_ach%'],np.maximum((product_data['new_brand_target']-product_data['todays_target'])/product_data['todays_target'],0)]
# product_data['final_increase'] = np.select(cond,cho,default=0)
product_data['final_increase'] =(1-product_data['final_ach%'])+np.maximum((product_data['new_brand_target']-product_data['todays_target'])/product_data['todays_target'],0)

product_data = product_data[product_data['nmv_ability']>0]

product_data['old_margin'] = (product_data['price'] - product_data['wac_p'])/product_data['price']
product_data['old_cm3'] = product_data['old_margin']-product_data['ops']
product_data['30_perc_cm3']=(product_data['old_cm3']*0.5)*-1

product_data = product_data.merge(product_elasticity[['region','product_id','elas']],on=['region','product_id'])

product_data['discount'] = np.minimum(np.maximum(product_data['final_increase']/product_data['elas'],-0.02),-0.0035)
product_data['discount'] = np.where(product_data['final_increase'] == 0, 0, product_data['discount'])
product_data['discount'] = np.where(product_data['30_perc_cm3']<product_data['discount'], product_data['30_perc_cm3'], product_data['discount'])
product_data['discount'] = np.minimum(np.maximum(product_data['discount'],-0.02),-0.0035)

product_data['new_price'] = product_data['price']*(1+product_data['discount'])
product_data['new_price']=np.round(product_data['new_price'] * 4) / 4
product_data['new_margin'] = (product_data['new_price'] - product_data['wac_p'])/product_data['new_price']

product_data['new_cm3'] = product_data['new_margin']-product_data['ops']
product_data= product_data.merge(region_vw,on='region')

In [13]:
final_products = product_data[(product_data['vw']>product_data['region_vw']+product_data['vw_std'])
             |
            (product_data['new_cm3']>=0.01)
            ]

## Retailers selction

In [17]:
query = '''
WITH order_data AS (
  SELECT
    so.created_at::Date AS order_date,
    parent_sales_order_id,
    so.retailer_id,
    sales_order_status_id AS order_status,
    SUM(pso.total_price) AS nmv,
    SUM(PURCHASED_ITEM_COUNT * pso.BASIC_UNIT_COUNT * wac_p) AS cogs,
    (LEAD(so.created_at::Date) OVER (
      PARTITION BY so.retailer_id
      ORDER BY so.created_at::Date
    )) - so.created_at::Date AS next_order_diff
  FROM product_sales_order pso
  JOIN sales_orders so ON so.id = pso.sales_order_id 
  JOIN products ON products.id = pso.product_id
  JOIN brands ON products.brand_id = brands.id
  JOIN categories ON products.category_id = categories.id
  JOIN finance.all_cogs f ON f.product_id = pso.product_id
    AND f.from_date::date <= so.created_at::date
    AND f.to_date::date > so.created_at::date
  JOIN product_units ON product_units.id = products.unit_id
  WHERE so.created_at::date BETWEEN CURRENT_DATE - 63 AND CURRENT_DATE
    AND so.sales_order_status_id NOT IN (7, 12)
    AND so.channel IN ('retailer')
    AND pso.PURCHASED_ITEM_COUNT <> 0
  GROUP BY 1, 2, 3, 4
),
agg AS (
  SELECT
    retailer_id,
    COUNT(DISTINCT parent_sales_order_id) AS orders,
    MIN(order_date) AS fo,
    MAX(order_date) AS lo,
    SUM(nmv) AS total_nmv,
    SUM(cogs) AS total_cogs,
    (SUM(nmv) - SUM(cogs)) / SUM(nmv) AS margin,
    AVG(next_order_diff) AS next_order_diff
  FROM order_data
  GROUP BY retailer_id
),
final AS (
  SELECT
    *,
    CASE WHEN lo = fo THEN 0 ELSE orders / (NULLIF(DATEDIFF(day, fo, lo), 0) / 30.0) END AS frequency
  FROM agg
)
SELECT
  final.*,
  LAST_VALUE(order_status) OVER (
    PARTITION BY retailer_id
    ORDER BY order_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
  ) AS last_order_status
FROM final
JOIN order_data USING (retailer_id)
QUALIFY ROW_NUMBER() OVER (PARTITION BY retailer_id ORDER BY order_date DESC) = 1;

'''
target_retailers =query_snowflake(query, columns = ['RETAILER_ID','ORDERS','FO','LO','TOTAL_NMV','TOTAL_COGS','MARGIN','NEXT_ORDER_DIFF','FREQUENCY','last_order_status'])
target_retailers.columns = target_retailers.columns.str.lower()
cols_to_exclude = ['fo', 'lo']  
target_retailers[target_retailers.columns.difference(cols_to_exclude)] = target_retailers[target_retailers.columns.difference(cols_to_exclude)].apply(pd.to_numeric, errors='coerce')
target_retailers['next_order_diff'] = np.ceil(target_retailers['next_order_diff'])
target_retailers['expetced_next_order'] = (datetime.today().date())+ timedelta(days=1)
target_retailers.loc[target_retailers['frequency'] >= 5,'expetced_next_order'] = target_retailers.loc[target_retailers['frequency'] >= 5,'lo'] + pd.to_timedelta(target_retailers.loc[target_retailers['frequency'] >= 5,'next_order_diff'], unit='D')

In [15]:
target_retailers = target_retailers[(target_retailers['expetced_next_order'] != (datetime.today().date()))&(target_retailers['last_order_status']==6) ]

In [18]:
query = '''
WITH retailers_orders AS (
  SELECT
    *,
    ROW_NUMBER() OVER (
      PARTITION BY retailer_id
      ORDER BY order_date
    ) AS ret_order_id
  FROM (
    SELECT
      *,
      COALESCE(
        LEAD(order_date) OVER (
          PARTITION BY retailer_id
          ORDER BY order_date
        ),
        CURRENT_TIMESTAMP
      ) AS next_order
    FROM (
      SELECT DISTINCT
        so.retailer_id,
        so.created_at AS order_date
      FROM sales_orders so
      WHERE so.created_at::DATE BETWEEN date_trunc('month', CURRENT_DATE - interval '4 months') AND CURRENT_DATE
    )
  )
),

app_rnk AS (
  SELECT
    *,
    ROW_NUMBER() OVER (
      PARTITION BY retailer_id
      ORDER BY new_rnk DESC
    ) AS sku_rnk
  FROM (
    SELECT
      retailer_id,
      product_id,
      SUM(sku_rnk_order * (sku_orders_per_rnk / NULLIF(ret_orders, 0))) AS new_rnk
    FROM (
      SELECT
        retailer_id,
        product_id,
        sku_rnk_order,
        ret_orders,
        COUNT(DISTINCT ret_order_id) AS sku_orders_per_rnk,
        SUM(COUNT(DISTINCT ret_order_id)) OVER (
          PARTITION BY retailer_id, product_id
        ) AS sku_orders
      FROM (
        SELECT
          retailer_id,
          ret_order_id,
          product_id,
          MAX(sku_rnk_order) AS sku_rnk_order,
          COUNT(DISTINCT ret_order_id) OVER (PARTITION BY retailer_id) AS ret_orders
        FROM (
          SELECT
            event_date,
            event_timestamp,
            uc.retailer_id,
            productsid AS product_id,
            ret_order_id,
            DENSE_RANK() OVER (
              PARTITION BY uc.retailer_id, ret_order_id
              ORDER BY event_timestamp DESC
            ) AS sku_rnk_order
          FROM maxab_events.update_cart uc
          JOIN retailers_orders ro ON ro.retailer_id = uc.retailer_id
            AND event_timestamp >= ro.order_date
            AND event_timestamp < ro.next_order
          WHERE date_trunc('day', event_timestamp) BETWEEN date_trunc('month', CURRENT_DATE - interval '4 months') AND CURRENT_DATE
            AND country LIKE '%Egypt%'
            AND update_type = 'add'
            AND user_id LIKE '%EG_retailers_%'
            AND productsid REGEXP '^[0-9]+$'
        )
        GROUP BY 1, 2, 3
      )
      GROUP BY ALL
    )
    GROUP BY ALL
  )
)
select retailer_id,product_id,sku,brand,cat,rnk
from (
SELECT
  *,
  SUM(rnk_norm) OVER (
    PARTITION BY retailer_id
    ORDER BY rnk_norm ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS cumm_cntrb
FROM (
  SELECT
    *,
    rnk / NULLIF(SUM(rnk) OVER (PARTITION BY retailer_id), 0) AS rnk_norm
  FROM (
    SELECT
      x.*,
      CASE
        WHEN app_rnk.new_rnk IS NOT NULL THEN
          (0.3 * normalized_orders_cntrb) +
          (0.05 * normalized_diff_date) +
          (0.35 * normalized_nmv_cntrb) +
          (0.3 * COALESCE(app_rnk.new_rnk, 0))
        ELSE
          (0.45 * normalized_orders_cntrb) +
          (0.1 * normalized_diff_date) +
          (0.45 * normalized_nmv_cntrb)
      END AS last_w,
      DENSE_RANK() OVER (
        PARTITION BY x.retailer_id
        ORDER BY last_w DESC
      ) AS rnk
    FROM (
      SELECT
        *,
        (nmv_cntrb - MIN(nmv_cntrb) OVER (PARTITION BY retailer_id)) /
        NULLIF(MAX(nmv_cntrb) OVER (PARTITION BY retailer_id) - MIN(nmv_cntrb) OVER (PARTITION BY retailer_id), 0) AS normalized_nmv_cntrb,
        
        (diff_date - MIN(diff_date) OVER (PARTITION BY retailer_id)) /
        NULLIF(MAX(diff_date) OVER (PARTITION BY retailer_id) - MIN(diff_date) OVER (PARTITION BY retailer_id), 0) AS normalized_diff_date,
        
        (orders_cntrb - MIN(orders_cntrb) OVER (PARTITION BY retailer_id)) /
        NULLIF(MAX(orders_cntrb) OVER (PARTITION BY retailer_id) - MIN(orders_cntrb) OVER (PARTITION BY retailer_id), 0) AS normalized_orders_cntrb
      FROM (
        SELECT
          *,
          sku_orders / NULLIF(retailer_orders, 0) AS orders_cntrb,
          sku_nmv / NULLIF(retailer_nmv, 0) AS nmv_cntrb,
          CASE
            WHEN (retailer_last_order - last_sku_order) <> 0 THEN 1.0 / NULLIF((retailer_last_order - last_sku_order), 0)
            ELSE 1
          END AS diff_date
        FROM (
          SELECT
            retailer_id,
            product_id,
            sku,
            brand,
            cat,
            retailer_orders,
            retailer_nmv,
            retailer_last_order,
            COUNT(DISTINCT parent_sales_order_id) AS sku_orders,
            SUM(total_price) AS sku_nmv,
            MAX(created_at) AS last_sku_order
          FROM (
            SELECT DISTINCT
              so.retailer_id,
              pso.product_id,
              so.parent_sales_order_id,
              pso.total_price,
              so.created_at::DATE AS created_at,
              CONCAT(products.name_ar, ' ', products.size, ' ', product_units.name_ar) AS sku,
              brands.name_ar AS brand,
              categories.name_ar AS cat,
              COUNT(DISTINCT so.parent_sales_order_id) OVER (PARTITION BY retailer_id) AS retailer_orders,
              SUM(pso.total_price) OVER (PARTITION BY retailer_id) AS retailer_nmv,
              MAX(so.created_at::DATE) OVER (PARTITION BY retailer_id) AS retailer_last_order
            FROM product_sales_order pso
            JOIN sales_orders so ON so.id = pso.sales_order_id
            JOIN products ON products.id = pso.product_id
            JOIN brands ON products.brand_id = brands.id
            JOIN categories ON products.category_id = categories.id and categories.name_ar not like '%سايب%'
			join sections on sections.id = categories.section_id and sections.name_ar not like '%مجمدات%'
            JOIN finance.all_cogs f ON f.product_id = pso.product_id
              AND f.from_date::DATE <= so.created_at::DATE
              AND f.to_date::DATE > pso.created_at::DATE
            JOIN product_units ON product_units.id = products.unit_id
            WHERE
              so.created_at::DATE BETWEEN date_trunc('month', CURRENT_DATE - interval '3 months') AND CURRENT_DATE - 1
              AND so.sales_order_status_id NOT IN (7, 12)
              AND so.channel IN ('telesales', 'retailer')
              AND pso.purchased_item_count <> 0
            QUALIFY retailer_nmv <> 0
          )
          GROUP BY ALL
        )
      )
    ) x
    LEFT JOIN app_rnk ON app_rnk.retailer_id = x.retailer_id AND app_rnk.product_id = x.product_id
  )
)
where rnk <=15
)
'''
top_sku_ret = query_snowflake(query, columns = ['retailer_id','product_id','sku','brand','cat','rnk'])
top_sku_ret['retailer_id'] = pd.to_numeric(top_sku_ret['retailer_id'])
top_sku_ret['rnk'] = pd.to_numeric(top_sku_ret['rnk'])

Unnamed: 0,retailer_id,product_id,sku,brand,cat,rnk
0,230568,7005,اندومى لحمة بيف جامبو عرض 44 كيس - 100 جم,اندومي,شعرية سريعة التحضير,1
1,230568,12224,اندومى سوبر مى خضار 5 جنيه - 56 جم,اندومي,شعرية سريعة التحضير,2
2,230568,12225,اندومى سوبر مى لحمة بيف 5 جنيه - 56 جم,اندومي,شعرية سريعة التحضير,3
3,230568,12775,صابون جوى ابيض عرض - 110 جم,جوي,صابون,4
4,230568,11953,اوكسى يدوى نسيم الشرق 15 جنيه - 175 جم,اوكسي,منظفات,5
...,...,...,...,...,...,...
975387,234474,411,لبن جهينة مكس موز - 200 مل,جهينة ألبان,ألبان,1
975388,234474,9784,عصير جهينة بيور تفاح - 235 مل,جهينة عصاير,عصاير,1
975389,234474,206,لبن جهينة مكس فراولة - 200 مل,جهينة ألبان,ألبان,1
975390,234474,205,لبن جهينة مكس شوكولاتة - 200 مل,جهينة ألبان,ألبان,1


In [20]:
query = '''
with carts as (
WITH retailers_orders AS (
      SELECT DISTINCT
        so.retailer_id,
        max(so.created_at) AS order_date
		from sales_orders so
      	WHERE so.created_at::DATE BETWEEN date_trunc('month', CURRENT_DATE - interval '45 days') AND CURRENT_DATE - 3
		group by all
),
last_brand_order as (
select so.retailer_id,brands.id as brand_id,max(so.created_at::date) as last_brand_date 
FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id
JOIN categories ON products.category_id = categories.id

    where so.created_at::date >=  CURRENT_date - 30 
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
group by all 
),
events as (
select  distinct 
            event_date,
            event_timestamp,
            vb.retailer_id,
			vb.brand_id,
			vb.brand_name
			
		FROM maxab_events.view_brand vb
		join brands b on b.id = vb.brand_id 
		join products p on p.brand_id = b.id 
		JOIN categories ON p.category_id = categories.id and categories.name_ar not like '%سايب%'
		join sections on sections.id = categories.section_id and sections.name_ar not like '%مجمدات%'
          WHERE date_trunc('day', event_timestamp) BETWEEN date_trunc('month', CURRENT_DATE - interval '2 months') AND CURRENT_DATE
            AND country LIKE '%Egypt%'
            AND user_id LIKE '%EG_retailers_%'
			and vb.brand_id <> 'null'
),
cart_update as (
 SELECT
            event_date,
            event_timestamp,
            uc.retailer_id,
            productsid AS product_id,
			b.id as brand_id 
          FROM maxab_events.update_cart uc
		  join products p on p.id = uc.productsid
		  join brands b on b.id = p.brand_id 
		JOIN categories ON p.category_id = categories.id and categories.name_ar not like '%سايب%'
		join sections on sections.id = categories.section_id and sections.name_ar not like '%مجمدات%'
          WHERE date_trunc('day', event_timestamp) BETWEEN date_trunc('month', CURRENT_DATE - interval '2 months') AND CURRENT_DATE
            AND country LIKE '%Egypt%'
            AND update_type = 'add'
            AND user_id LIKE '%EG_retailers_%'
            AND productsid REGEXP '^[0-9]+$'
        )
select *, (date_rnk+cntrb_rnk)/2 as weight,row_number()over(partition by retailer_id order by weight) as final_rnk
from (
select *,
dense_rank()over(partition by retailer_id order by date_diff) as date_rnk,
dense_rank()over(partition by retailer_id order by cntrb) as cntrb_rnk

from (
select x.*, current_date - last_time::date as date_diff,num_times/sum(num_times)over(partition by x.retailer_id) as cntrb
from (
select retailer_id,brand_id,brand_name,count(distinct event_date) as num_times,max(event_timestamp) as last_time
from (
select e.*,ro.order_date
from events e 
join retailers_orders ro  on ro.retailer_id = e.retailer_id and e.event_timestamp >= ro.order_date
left join cart_update cu on cu.retailer_id = e.retailer_id and cu.brand_id = e.brand_id and cu.event_timestamp >= e.event_timestamp
where cu.brand_id is null
)
group by all 
having last_time >= CURRENT_date - 30 
)x 
left join last_brand_order lbo on lbo.brand_id = x.brand_id and lbo.retailer_id = x.retailer_id and lbo.last_brand_date >= last_time + interval '1 hour'
where lbo.last_brand_date is null
 
)
)
qualify final_rnk <= 15 
),
 base_data AS (
  SELECT 
    so.retailer_id,
    pso.product_id,
    brands.name_ar AS brand,
    carts.final_rnk,
    so.parent_sales_order_id,
    pso.total_price,
	COUNT(DISTINCT parent_sales_order_id) OVER (PARTITION BY so.retailer_id, brand) AS all_orders
  FROM product_sales_order pso
  JOIN sales_orders so ON so.id = pso.sales_order_id
  JOIN products ON products.id = pso.product_id
  JOIN brands ON products.brand_id = brands.id
  JOIN categories ON products.category_id = categories.id and categories.name_ar not like '%سايب%'
	join sections on sections.id = categories.section_id and sections.name_ar not like '%مجمدات%'
	
  JOIN carts ON carts.retailer_id = so.retailer_id AND carts.brand_id = brands.id 
  JOIN finance.all_cogs f ON f.product_id = pso.product_id
                          AND f.from_date::date <= so.created_at::date
                          AND f.to_date::date > so.created_at::date
  JOIN product_units ON product_units.id = products.unit_id 
  WHERE 
    so.created_at::date BETWEEN date_trunc('month', CURRENT_DATE - interval '4 months') AND CURRENT_DATE 
    AND so.sales_order_status_id NOT IN (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
)
select retailer_id,product_id,brand,final_rnk,rank_all
from (
select *,rank()over(partition by retailer_id,brand order by final_cntrb desc) as rank_all
from (
select *,(0.7*orders_cntrb)+(0.3*nmv_cntrb) as final_cntrb 
from (
select *,orders/all_orders as orders_cntrb , nmv/sum(nmv)over(partition by retailer_id,brand) as nmv_cntrb 
from (
SELECT 
  retailer_id,
  product_id,
  brand,
  final_rnk,
  all_orders,
  COUNT(DISTINCT parent_sales_order_id) AS orders,
  SUM(total_price) AS nmv,
FROM base_data
GROUP BY 
  retailer_id, product_id, brand, final_rnk,all_orders
)
)
)
)
'''
top_10_brands_cart = query_snowflake(query, columns = ['retailer_id','product_id','brand','final_rnk','rank_all'])
top_10_brands_cart['retailer_id'] = pd.to_numeric(top_10_brands_cart['retailer_id'])
top_10_brands_cart['final_rnk'] = pd.to_numeric(top_10_brands_cart['final_rnk'])
top_10_brands_cart['rank_all'] = pd.to_numeric(top_10_brands_cart['rank_all'])

Unnamed: 0,retailer_id,product_id,brand,final_rnk,rank_all
0,67,23,الملكة,5,1
1,67,25,الملكة,5,2
2,67,24,الملكة,5,3
3,67,27,الملكة,5,4
4,67,26,الملكة,5,5
...,...,...,...,...,...
293288,905898,378,شويبس,1,1
293289,906173,11180,اوكسي,4,1
293290,906173,11271,اوكسي,4,2
293291,906173,11778,اوكسي,4,3
