In [1]:
%%capture

# Upgrade pip
!pip install --upgrade pip
# Connectivity
!pip install psycopg2-binary  # PostgreSQL adapter
# !pip install snowflake-connector-python  # Snowflake connector
!pip install snowflake-connector-python==3.15.0 # Snowflake connector Older Version
!pip install snowflake-sqlalchemy  # Snowflake SQLAlchemy connector
!pip install warnings # Warnings management
# !pip install pyarrow # Serialization
!pip install keyring==23.11.0 # Key management
!pip install sqlalchemy==1.4.46 # SQLAlchemy
!pip install requests # HTTP requests
!pip install boto3 # AWS SDK
# !pip install slackclient # Slack API
!pip install oauth2client # Google Sheets API
!pip install gspread==5.9.0 # Google Sheets API
!pip install gspread_dataframe # Google Sheets API
!pip install google.cloud # Google Cloud
# Data manipulation and analysis
!pip install polars
!pip install pandas==2.2.1
!pip install numpy
# !pip install fastparquet
!pip install openpyxl # Excel file handling
!pip install xlsxwriter # Excel file handling
# Linear programming
!pip install pulp
# Date and time handling
!pip install --upgrade datetime
!pip install python-time
!pip install --upgrade pytz
# Progress bar
!pip install tqdm
# Database data types
!pip install db-dtypes
# Geospatial data handling
# !pip install geopandas
# !pip install shapely
# !pip install fiona
# !pip install haversine
# Plotting

# Modeling
!pip install statsmodels
!pip install scikit-learn

!pip install import-ipynb

In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import calendar
import json
from datetime import date, timedelta
from oauth2client.service_account import ServiceAccountCredentials
import importlib
import import_ipynb
import warnings
from datetime import datetime, timedelta
import pytz  
import os
import snowflake.connector
import boto3
warnings.filterwarnings("ignore")
import base64
from botocore.exceptions import ClientError
from requests import get
from pathlib import Path
import requests
from time import sleep  # Delays execution

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  warn_incompatible_dep(


In [None]:
# Class to download from S3 bucket
class S3Downloader:
    def __init__(self, bucket_name):
        self.bucket_name = bucket_name

    def download_file(self, key, destination):
        """Download a file from S3 and ensure the local directory exists."""
        s3 = boto3.resource('s3')

        # Create parent directory if it does not exist
        destination_dir = os.path.dirname(destination)
        if destination_dir:
            os.makedirs(destination_dir, exist_ok=True)  # Ensure the directory exists

        try:
            s3.Bucket(self.bucket_name).download_file(key, destination)
            print(f"Downloaded: {key} -> {destination}")
        except ClientError as e:
            if e.response['Error']['Code'] == "404":
                print(f"Error: {key} does not exist in S3.")
            else:
                raise
        except FileNotFoundError as e:
            print(f"FileNotFoundError: {destination}. Ensure the path is correct.")
            raise e

# Initializing downloader class
downloader = S3Downloader('io.maxab.sagemaker1')

# Class downloads
dbdp_s3_path = 'automated-notebooks/amrmaali/DBDP_Revamped'
downloader.download_file(f'{dbdp_s3_path}/setup_environment_2.py', 'setup_environment_2.py')

# Class imports
import setup_environment_2
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

In [None]:
def query_snowflake(query, columns=[]):
    import os
    import snowflake.connector
    import numpy as np
    import pandas as pd
    con = snowflake.connector.connect(
        user =  os.environ["SNOWFLAKE_USERNAME"],
        account= os.environ["SNOWFLAKE_ACCOUNT"],
        password= os.environ["SNOWFLAKE_PASSWORD"],
        database =os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        if len(columns) == 0:
            out = pd.DataFrame(np.array(cur.fetchall()))
        else:
            out = pd.DataFrame(np.array(cur.fetchall()),columns=columns)
        return out
    except Exception as e:
        print("Error: ", e)
    finally:
        cur.close()
        con.close()

In [None]:
def get_secret(secret_name):
    region_name = "us-east-1"

    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    # In this sample we only handle the specific exceptions for the 'GetSecretValue' API.
    # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
    # We rethrow the exception by default.

    try:
        get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    except ClientError as e:
        if e.response['Error']['Code'] == 'DecryptionFailureException':
            # Secrets Manager can't decrypt the protected secret text using the provided KMS key.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InternalServiceErrorException':
            # An error occurred on the server side.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidParameterException':
            # You provided an invalid value for a parameter.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidRequestException':
            # You provided a parameter value that is not valid for the current state of the resource.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'ResourceNotFoundException':
            # We can't find the resource that you asked for.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
    else:
        # Decrypts secret using the associated KMS CMK.
        # Depending on whether the secret is a string or binary, one of these fields will be populated.
        if 'SecretString' in get_secret_value_response:
            return get_secret_value_response['SecretString']
        else:
            return base64.b64decode(get_secret_value_response['SecretBinary'])

In [None]:
pricing_api_secret = json.loads(get_secret("prod/pricing/api/"))
username = pricing_api_secret["egypt_username"]
password = pricing_api_secret["egypt_password"]
secret = pricing_api_secret["egypt_secret"]

In [None]:
def get_access_token(url, client_id, client_secret):
    """
    get_access_token function takes three parameters and returns a session token
    to connect to MaxAB APIs

    :param url: production MaxAB token URL
    :param client_id: client ID
    :param client_secret: client sercret
    :return: session token
    """
    response = requests.post(
        url,
        data={"grant_type": "password",
              "username": username,
              "password": password},
        auth=(client_id, client_secret),
    )
    return response.json()["access_token"]

In [None]:
def post_QD(file_name):
    token = get_access_token('https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
                             'main-system-externals',
                             secret)
    url = "https://api.maxab.info/commerce/api/admins/v1/quantity-discounts"
    payload={}
    files=[
      ('file',(file_name,open(file_name,'rb'),'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))
    ]
    headers = {
      'Authorization': 'bearer {}'.format(token)}

    response = requests.request("POST", url, headers=headers, data=payload, files=files)
    return response

In [None]:
def writer_snowflake_query(query):
    import os
    import snowflake.connector

    config = {
        'user': os.environ["SNOWFLAKE_SERVICE_USERNAME"],
        'account': os.environ["SNOWFLAKE_ACCOUNT"],
        'private_key_file': '/tmp/sagemaker_service.p8',
        'database': os.environ["SNOWFLAKE_DATABASE"] ,
        'role': os.environ["SNOWFLAKE_ROLE"],
        'schema': 'PUBLIC'
        }

    conn = snowflake.connector.connect(**config)

    try:
        cur = conn.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        return 1
    except Exception as e:
        print("Error: ", e)
    finally:
        cur.close()
        conn.close()

def pandas_dtype_to_snowflake(dtype):
    """Maps pandas/numpy dtype to Snowflake SQL data type."""
    if pd.api.types.is_integer_dtype(dtype):
        return "NUMBER,"
    elif pd.api.types.is_float_dtype(dtype):
        return "FLOAT,"
    elif pd.api.types.is_bool_dtype(dtype):
        return "BOOLEAN,"
    elif pd.api.types.is_datetime64_any_dtype(dtype):
        return "TIMESTAMP,"
    elif pd.api.types.is_object_dtype(dtype):
        return "TEXT,"
    elif pd.api.types.is_categorical_dtype(dtype):
        return "TEXT,"
    else:
        return "TEXT,"  # fallback

def dataframe_to_snowflake_columns(df, table_name):
    """Generates Snowflake-compatible column definitions from a DataFrame."""
    lines = [f'CREATE TABLE IF NOT EXISTS {table_name} (']
    for col in df.columns:
        snowflake_type = pandas_dtype_to_snowflake(df[col].dtype)
        if col.lower() in ['group', 'section']:
            col = f'"{col}"'
        if col.lower() in ['start_date', 'end_date']:
            lines.append(f'{col} TIMESTAMP,')
        else:
            lines.append(f'{col} {snowflake_type}')
    return "\n".join(lines)[:-1] + ');'

# create tables if not exist
def table_exist_test(df, table_name):
    query_string = dataframe_to_snowflake_columns(df, table_name)
    writer_snowflake_query(query_string)

# Snowflake DB query to write into tables
def eg_snowflake_writer(df, table, schema):
    import os
    import snowflake.connector
    import numpy as np
    import pandas as pd
    from snowflake.connector.pandas_tools import write_pandas
    import os

    config = {
        'user': os.environ["SNOWFLAKE_SERVICE_USERNAME"],
        'account': os.environ["SNOWFLAKE_ACCOUNT"],
        'private_key_file': '/tmp/sagemaker_service.p8',
        'database': os.environ["SNOWFLAKE_DATABASE"] ,
        'role': os.environ["SNOWFLAKE_ROLE"],
        'schema': 'PUBLIC'
    }
    conn = snowflake.connector.connect(**config)
    success, _, _, _ = write_pandas(conn=conn, df=df, table_name=table, schema=schema)
    return success 

def DatabaseDump(df, path, erase='False'):
    # initialize the standard command
    command_string = f"DELETE FROM {path} WHERE True"
    print_string = f"Succesfuly Removed & Re-Added Data \n for {path}"

    # if the flag is 'false', erase latest push (day / time_slot)
    if erase.lower() == 'false':
        if "start_date" in df.columns.str.lower():
            date_value = main_df.start_date.values[0]
            print(date_value)
            command_string += f" AND start_date = TO_TIMESTAMP('{date_value}')"
            print(command_string)
            print_string += f"\ndate = {df['start_date'].values[0]}"

        if "cohort_id" in df.columns.str.lower():
            command_string += f" AND cohort_id IN {tuple(df.cohort_id.unique())}"
            print_string += f"\nCohort IDs IN {tuple(df.cohort_id.unique())}"

    # if the flag is 'month', erase current month
    if erase.lower() == 'month':
        if "created_at" in df.columns.str.lower():
            command_string += f" AND DATE_TRUNC('month', created_at) = DATE_TRUNC('month', SYSDATE())"
            print_string += f"\ndate = Current month"
        if "date" in df.columns.str.lower():
            command_string += f" AND DATE_TRUNC('month', date) = DATE_TRUNC('month', SYSDATE())"
            print_string += f"\ndate = Current month"

    # Remove data of the same day, time_slot, etc...
    writer_snowflake_query(command_string)

    # Push the new data to the table
    df.columns = df.columns.str.upper()
    eg_snowflake_writer(df, path.split('.')[1].upper(), path.split('.')[0].upper())
    print(print_string)


In [None]:
def post_cart_rules(id_,file_name):
    token = get_access_token('https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
                             'main-system-externals',
                             secret)
    url = "https://api.maxab.info/main-system/api/admin-portal/cohorts/{}/cart-rules".format(id_)
    payload={}
    files=[
      ('sheet',(file_name,open(file_name,'rb'),'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))
    ]
    headers = {
      'Authorization': 'bearer {}'.format(token)}

    response = requests.request("POST", url, headers=headers, data=payload, files=files)
    return response

In [None]:
scope = ["https://spreadsheets.google.com/feeds",
         'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(setup_environment_2.get_secret("prod/maxab-sheets")), scope)
client = gspread.authorize(creds)
force_brands = client.open('QD_brands').worksheet('Sheet1')
force_brands_df = pd.DataFrame(force_brands.get_all_records())
if(force_brands_df.empty):
    force_brands_df = pd.DataFrame(columns=['brand'])
    brand_filter = ""
else:
    brand_filter = f"OR brand IN ({','.join([repr(b) for b in list(force_brands_df.brand.unique())])})"

In [None]:
if datetime.now(pytz.timezone('Africa/Cairo')).hour >= 13:
    query = '''
    select cppu.cohort_id,product_id,packing_unit_id,COALESCE(cppu.MAX_PER_SALES_ORDER,cppu2.MAX_PER_SALES_ORDER) as current_cart_rule
    from COHORT_PRODUCT_PACKING_UNITS cppu 
    join PACKING_UNIT_PRODUCTS pup on cppu.PRODUCT_PACKING_UNIT_ID = pup.id 
    join cohorts c on c.id = cppu.cohort_id
    join COHORT_PRODUCT_PACKING_UNITS cppu2 on cppu.PRODUCT_PACKING_UNIT_ID = cppu2.PRODUCT_PACKING_UNIT_ID and cppu2.cohort_id = c.FALLBACK_COHORT_ID 
    where cppu.cohort_id in (700,701,702,703,704,1123,1124,1125,1126)

    '''
    live_cart_rules = query_snowflake(query, columns = ['cohort_id','product_id','packing_unit_id','current_cart_rule']) 
    live_cart_rules.cohort_id=pd.to_numeric(live_cart_rules.cohort_id)
    live_cart_rules.product_id=pd.to_numeric(live_cart_rules.product_id)
    live_cart_rules.packing_unit_id=pd.to_numeric(live_cart_rules.packing_unit_id)
    live_cart_rules.current_cart_rule=pd.to_numeric(live_cart_rules.current_cart_rule)
    
    command_string =  f'''
with rr as (

select product_id,warehouse_id,rr
from (
select * ,max(date)over(partition by product_id,warehouse_id) as max_date
from finance.PREDICTED_RUNNING_RATES
qualify date = max_date
and date::date >= current_date - 14 
)

),
stocks as (
WITH whs as (SELECT *
             FROM   (values
                            ('Cairo', 'El-Marg', 38,700),
                            ('Cairo', 'Mostorod', 1,700),
                            ('Giza', 'Barageel', 236,701),
                            ('Delta West', 'El-Mahala', 337,703),
                            ('Delta West', 'Tanta', 8,703),
                            ('Delta East', 'Mansoura FC', 339,704),
                            ('Delta East', 'Sharqya', 170,704),
                            ('Upper Egypt', 'Assiut FC', 501,1124),
                            ('Upper Egypt', 'Bani sweif', 401,1126),
                            ('Upper Egypt', 'Menya Samalot', 703,1123),
                            ('Upper Egypt', 'Sohag', 632,1125),
                            ('Alexandria', 'Khorshed Alex', 797,702),
							('Giza', 'Sakkarah', 962,701)
							
							)
                    x(region, wh, warehouse_id,cohort_id))
select cohort_id,product_id,sum(stocks) as stocks ,case when sum(rr) > 0 then SUM(stocks)/sum(rr) else SUM(stocks) end  as doh
from (
		SELECT DISTINCT whs.region,
				cohort_id,	
                whs.wh,
                product_warehouse.product_id,
                (product_warehouse.available_stock)::integer as stocks,
				coalesce(rr.rr,0) as rr 
        from whs
        JOIN product_warehouse ON product_warehouse.warehouse_id = whs.warehouse_id
        JOIN products on product_warehouse.product_id = products.id
        JOIN product_units ON products.unit_id = product_units.id
		left join rr on rr.product_id= products.id and rr.warehouse_id = whs.warehouse_id

        where   product_warehouse.warehouse_id not in (6,9,10)
            AND product_warehouse.is_basic_unit = 1
			and product_warehouse.available_stock > 0 

)
group by all
HAVING doh > 1 
),
base as (
select *, row_number()over(partition by retailer_id order by priority) as rnk 
from (
select x.*,TAGGABLE_ID as retailer_id 
from (
select id as cohort_id,name as cohort_name,priority,dynamic_tag_id 
from cohorts 
where is_active = 'true'
and id in (700,701,702,703,704,1123,1124,1125,1126)
) x 
join DYNAMIC_TAGgables dt on x.dynamic_tag_id = dt.dynamic_tag_id
)
qualify rnk = 1 
order by cohort_id
),
selected_skus as (
select *
from (
select cohort_id,cohort_name,product_id,cat,brand,row_number()over(partition by cohort_id,cat order by cntrb) as num_skus
from (
select *,min(case when cumulative_sum > 0.4 then cumulative_sum end) over(partition by cat, cohort_id) as thres
from (
select *,SUM(cntrb) OVER (partition by cat, cohort_id ORDER BY cntrb desc ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_sum
from (
select *, num_order/sum(num_order)over(partition by cat,cohort_id) as cntrb
from (
SELECT  DISTINCT
		base.cohort_id,
		base.cohort_name,
		pso.product_id,
		CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
		brands.name_ar as brand, 
		categories.name_ar as cat,
		count(distinct parent_sales_order_id) as num_order ,
        sum(pso.total_price) as nmv,
       sum(COALESCE(f.wac_p,0) * pso.purchased_item_count * pso.basic_unit_count) as cogs,
	   (nmv-cogs)/nmv as bm 
		

FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
join base on base.retailer_id = so.retailer_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id
JOIN categories ON products.category_id = categories.id and categories.name_ar not like '%سايب%'
JOIN finance.all_cogs f  ON f.product_id = pso.product_id
                        AND f.from_date::date <= so.created_at::date
                        AND f.to_date::date > so.created_at::date
JOIN product_units ON product_units.id = products.unit_id 
join stocks s on s.product_id = pso.product_id and s.cohort_id = base.cohort_id

WHERE   True
    AND so.created_at::date between date_trunc('month',current_date - interval '2 months') and CURRENT_date-1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
)
)
)
)
where cumulative_sum <= thres 
{brand_filter}
)
where  num_skus <= 10 
{brand_filter}
),

main as (
select * ,max(rets) over(partition by cohort_id) as max_rets 
from(
select *,count(distinct retailer_id) over(partition by region,cohort_id) as rets 
from (
SELECT  DISTINCT
		so.created_at::date as date,
		parent_sales_order_id,
		so.retailer_id,
		base.cohort_id,
		base.cohort_name,
		case when regions.id = 2 then states.name_en else regions.name_en end as region,
		pso.product_id,
		CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
		packing_unit_id,
		brands.name_ar as brand, 
		categories.name_ar as cat,
		sum(pso.purchased_item_count) as qty,
        sum(pso.total_price) as nmv,
       sum(COALESCE(f.wac_p,0) * pso.purchased_item_count * pso.basic_unit_count) as cogs,
	   (nmv-cogs)/nmv as bm 
		

FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
join base on base.retailer_id = so.retailer_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id
JOIN categories ON products.category_id = categories.id
JOIN finance.all_cogs f  ON f.product_id = pso.product_id
                        AND f.from_date::date <= so.created_at::date
                        AND f.to_date::date > so.created_at::date
JOIN product_units ON product_units.id = products.unit_id 
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id
join selected_skus ss on ss.product_id = pso.product_id and ss.cohort_id = base.cohort_id

WHERE   True
    AND so.created_at::date between date_trunc('month',current_date - interval '2 months') and CURRENT_date-1
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
)
)
qualify rets = max_rets
),
cohort_data as (
select region,cohort_id,cohort_name,product_id,sku,brand,cat,packing_unit_id,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY qty) AS region_q1,
MEDIAN(qty) as region_median,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY qty) AS region_q3,
PERCENTILE_CONT(0.85) WITHIN GROUP (ORDER BY qty) AS region_85,
STDDEV_POP(qty) as std
from main
group by all 
),
recent_cohort_data as (
select cohort_id,cohort_name,product_id,sku,brand,cat,packing_unit_id,
MEDIAN(qty) as recent_region_median,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY qty) AS recent_region_q3,
STDDEV_POP(qty) as recent_std
from main
where date between current_date - 8 and current_date - 1 
group by all 
),
 freq_table AS (
  SELECT
  	 cohort_id,cohort_name,
    PRODUCT_ID,sku,brand,cat,
	packing_unit_id,
    qty,
    COUNT(distinct parent_sales_order_id) AS freq
  FROM main
  GROUP BY all
),
lag_lead AS (
  SELECT
   cohort_id,cohort_name,
    PRODUCT_ID,sku,brand,cat,
	packing_unit_id,
    qty,
    freq,
    LAG(freq) OVER (PARTITION BY cohort_id,PRODUCT_ID,packing_unit_id ORDER BY qty) AS prev_freq,
    LEAD(freq) OVER (PARTITION BY cohort_id,PRODUCT_ID,packing_unit_id ORDER BY qty) AS next_freq
  FROM freq_table
),
most_freq as (
select * 
from (
select *,max(cntrb)over(partition by product_id,packing_unit_id,cohort_id) as max_cntrb
from (
SELECT *, freq/sum(freq) over(partition by product_id,packing_unit_id,cohort_id) as cntrb
FROM lag_lead ll 
WHERE (freq > COALESCE(prev_freq, -1))
  AND (freq > COALESCE(next_freq, -1))
  )
  )
  where cntrb >= max_cntrb- 0.05
  order by product_id
),
most_qty as (
select cohort_id,cohort_name,product_id,sku,cat,brand,packing_unit_id,ceil(sum(freq_cntrb*qty)) as final_qty 
from (
select *,freq/sum(freq)over(partition by  product_id,packing_unit_id,cohort_id) as freq_cntrb
from most_freq 
)
group by all 
),
final_data as (
select *,
ceil(
least(
GREATEST(
      recent_region_median + 0.75 * recent_std,
      final_qty,
	  region_median+0.75*std,
	  region_median+2,
	  2
    ),
	GREATEST(region_median+2,region_median*1.5)
	)
	
	) as tier_1,
 ceil(
 least(
 GREATEST(
      final_qty + 1 * std,
      region_q3 + 1 * std,
	  region_85 + 0.5 * std,
      recent_region_q3 + 1 * recent_std,
	  tier_1*1.4
    ),
	tier_1*3
	)
	) as tier_2
from (
select  rd.region,mq.*,region_q1,
region_median,
region_q3,
region_85,
std,
COALESCE(recent_region_median,0) as recent_region_median,
COALESCE(recent_region_q3,0) as recent_region_q3,
COALESCE(recent_std,0) as recent_std
from cohort_data rd 
join most_qty mq on rd.cohort_id =mq.cohort_id
and rd.product_id =  mq.product_id
and rd.packing_unit_id = mq.packing_unit_id 
left join recent_cohort_data rrd on rrd.cohort_id =mq.cohort_id
and rrd.product_id =  mq.product_id
and rrd.packing_unit_id = mq.packing_unit_id 
)
),
local_prices as (
SELECT  case when cpu.cohort_id in (700,695) then 'Cairo'
             when cpu.cohort_id in (701) then 'Giza'
             when cpu.cohort_id in (704,698) then 'Delta East'
             when cpu.cohort_id in (703,697) then 'Delta West'
             when cpu.cohort_id in (696,1123,1124,1125,1126) then 'Upper Egypt'
             when cpu.cohort_id in (702,699) then 'Alexandria'
        end as region,
		cohort_id,
        pu.product_id,
		pu.packing_unit_id as packing_unit_id,
		pu.basic_unit_count,
        avg(cpu.price) as price
FROM    cohort_product_packing_units cpu
join    PACKING_UNIT_PRODUCTS pu on pu.id = cpu.product_packing_unit_id
WHERE   cpu.cohort_id in (700,701,702,703,704,696,695,698,697,699,1123,1124,1125,1126)
    and cpu.created_at::date<>'2023-07-31'
    and cpu.is_customized = true
	group by all 
),
live_prices as (
select region,cohort_id,product_id,pu_id as packing_unit_id,buc as basic_unit_count,NEW_PRICE as price
from materialized_views.DBDP_PRICES
where created_at = current_date
and DATE_PART('hour',CURRENT_TIME) BETWEEN SPLIT_PART(time_slot, '-', 1)::int AND SPLIT_PART(time_slot, '-', 2)::int
and cohort_id in (700,701,702,703,704,696,695,698,697,699,1123,1124,1125,1126)
),
prices as (
select *
from (
    SELECT *, 1 AS priority FROM live_prices
    UNION ALL
    SELECT *, 2 AS priority FROM local_prices
)
QUALIFY ROW_NUMBER() OVER (PARTITION BY region,cohort_id,product_id,packing_unit_id ORDER BY priority) = 1
),
region_prices as (
select region,product_id,packing_unit_id,basic_unit_count,avg(price) as region_price
from prices 
where price is not null 
group by all

),
finalized as (
select fd.*,COALESCE(p.basic_unit_count,rp.basic_unit_count) as basic_unit_count,COALESCE(p.price,rp.region_price) as price
from final_data fd 
left join prices p on fd.cohort_id = p.cohort_id and p.product_id = fd.product_id and p.packing_unit_id = fd.packing_unit_id
left join region_prices rp on case when fd.region = 'Giza' then 'Cairo' else fd.region end = rp.region and rp.product_id = fd.product_id and rp.packing_unit_id = fd.packing_unit_id
),
cntrbs as (
select main.product_id,main.cohort_id,main.packing_unit_id,
count(distinct case when qty < tier_1 then retailer_id end ) as ret_below_t1,
count(distinct case when qty >= tier_1  and qty <tier_2 then retailer_id end ) as ret_t1,
count(distinct case when qty >= tier_2  then retailer_id end )as ret_t2 
from main
join finalized f on main.product_id = f.product_id and main.cohort_id = f.cohort_id and f.packing_unit_id = main.packing_unit_id 
group by all 
)
select f.region,f.cohort_id,f.cohort_name,f.product_id,f.sku,f.cat,f.brand,f.packing_unit_id,TIER_1,tier_2,price, c.ret_below_t1,ret_t1,ret_t2,wac_p*BASIC_UNIT_COUNT as wac,f.region_median,f.RECENT_REGION_MEDIAN
from finalized f
join cntrbs c on c.product_id = f.product_id and c.cohort_id = f.cohort_id and f.packing_unit_id = c.packing_unit_id
join finance.all_cogs cogs on cogs.product_id = f.product_id and CURRENT_TIMESTAMP between cogs.from_date and cogs.to_date
where price is not null
'''
    quantity_disc_data = query_snowflake(command_string, columns = ['REGION','COHORT_ID','COHORT_NAME','PRODUCT_ID','SKU','CAT','BRAND','PACKING_UNIT_ID','TIER_1','TIER_2','PRICE','RET_BELOW_T1','RET_T1','RET_T2','WAC_P','region_median','RECENT_REGION_MEDIAN'])
    quantity_disc_data.columns = quantity_disc_data.columns.str.lower()
    quantity_disc_data.product_id = pd.to_numeric(quantity_disc_data.product_id)
    quantity_disc_data.packing_unit_id = pd.to_numeric(quantity_disc_data.packing_unit_id)
    quantity_disc_data.tier_1 = pd.to_numeric(quantity_disc_data.tier_1)
    quantity_disc_data.tier_2 = pd.to_numeric(quantity_disc_data.tier_2)

    quantity_disc_data.price = pd.to_numeric(quantity_disc_data.price)
    quantity_disc_data.wac_p = pd.to_numeric(quantity_disc_data.wac_p)
    quantity_disc_data.ret_below_t1 = pd.to_numeric(quantity_disc_data.ret_below_t1)

    quantity_disc_data.ret_t1 = pd.to_numeric(quantity_disc_data.ret_t1)
    quantity_disc_data.ret_t2 = pd.to_numeric(quantity_disc_data.ret_t2)

    quantity_disc_data.region_median = pd.to_numeric(quantity_disc_data.region_median)
    quantity_disc_data.recent_region_median = pd.to_numeric(quantity_disc_data.recent_region_median)


    quantity_disc_data = quantity_disc_data[~quantity_disc_data['cat'].isin(['كروت شحن','مياه معدنيه','مقرمشات','شيبسي'])]
    quantity_disc_data = quantity_disc_data[~quantity_disc_data['brand'].isin(['فيوري'])]
    quantity_disc_data['bm'] = (quantity_disc_data['price']-quantity_disc_data['wac_p']) / quantity_disc_data['price']



    query = '''

    with base as (
    select *, row_number()over(partition by retailer_id order by priority) as rnk 
    from (
    select x.*,TAGGABLE_ID as retailer_id 
    from (
    select id as cohort_id,name as cohort_name,priority,dynamic_tag_id 
    from cohorts 
    where is_active = 'true'
    and id in (700,701,702,703,704,1123,1124,1125,1126)
    ) x 
    join DYNAMIC_TAGgables dt on x.dynamic_tag_id = dt.dynamic_tag_id
    )
    qualify rnk = 1 
    order by cohort_id
    )

    SELECT  DISTINCT
            base.cohort_id,
            pso.product_id,
            pso.packing_unit_id,
            sum(pso.total_price) as nmv

    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    JOIN products on products.id=pso.product_id
    JOIN brands on products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id and categories.name_ar not like '%سايب%'
    JOIN product_units ON product_units.id = products.unit_id 
    JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
    JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
    JOIN cities on cities.id=districts.city_id
    join states on states.id=cities.state_id
    join regions on regions.id=states.region_id
    join base on base.retailer_id = so.retailer_id

    WHERE   so.created_at ::date between date_trunc('month',current_date - interval '2 months') and current_date -1 
        AND so.sales_order_status_id not in (7,12)
        AND so.channel IN ('telesales','retailer')
        AND pso.purchased_item_count <> 0

    GROUP BY ALL
    '''
    sales  = query_snowflake(query, columns = ['cohort_id','product_id','packing_unit_id','total_sales'])
    sales.product_id = pd.to_numeric(sales.product_id)
    sales.cohort_id = pd.to_numeric(sales.cohort_id)
    sales.packing_unit_id = pd.to_numeric(sales.packing_unit_id)
    sales.total_sales = pd.to_numeric(sales.total_sales)



    query = '''
    with base as (
    select *, row_number()over(partition by retailer_id order by priority) as rnk 
    from (
    select x.*,TAGGABLE_ID as retailer_id 
    from (
    select id as cohort_id,name as cohort_name,priority,dynamic_tag_id 
    from cohorts 
    where is_active = 'true'
    and id in (700,701,702,703,704,1123,1124,1125,1126)
    ) x 
    join DYNAMIC_TAGgables dt on x.dynamic_tag_id = dt.dynamic_tag_id
    )
    qualify rnk = 1 
    order by cohort_id
    )
    select region,cohort_id,product_id,packing_unit_id,
    avg(num_retailers) as daily_avg_retailers,
    STDDEV(num_retailers)  as std 
    from (
    select * 
    from (
    select *, 
    PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY num_retailers) over(partition by product_id,cohort_id,packing_unit_id)AS q1,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY num_retailers) over(partition by product_id,cohort_id,packing_unit_id)AS median,
    PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY num_retailers) over(partition by product_id,cohort_id,packing_unit_id)AS q3,
    STDDEV(num_retailers) over(partition by product_id,cohort_id,packing_unit_id) as std,
    q3-q1 as iqr
    from (
    select * , dense_rank() over(partition by date,product_id,cohort_id,packing_unit_id order by num_retailers desc ) as rnk 
    from (
    select date,region,cohort_id,product_id,packing_unit_id,count(distinct retailer_id) as num_retailers
    from (
    SELECT  DISTINCT
            so.created_at::date as date,
            parent_sales_order_id,
            so.retailer_id,
            base.cohort_id,
            base.cohort_name,
            case when regions.id = 2 then states.name_en else regions.name_en end as region,
            pso.product_id,
            CONCAT(products.name_ar,' ',products.size,' ',product_units.name_ar) as sku,
            packing_unit_id,
            brands.name_ar as brand, 
            categories.name_ar as cat,
            sum(pso.purchased_item_count) as qty,
            sum(pso.total_price) as nmv,
           sum(COALESCE(f.wac_p,0) * pso.purchased_item_count * pso.basic_unit_count) as cogs,
           (nmv-cogs)/nmv as bm 


    FROM product_sales_order pso
    JOIN sales_orders so ON so.id = pso.sales_order_id
    join base on base.retailer_id = so.retailer_id
    JOIN products on products.id=pso.product_id
    JOIN brands on products.brand_id = brands.id
    JOIN categories ON products.category_id = categories.id
    JOIN finance.all_cogs f  ON f.product_id = pso.product_id
                            AND f.from_date::date <= so.created_at::date
                            AND f.to_date::date > so.created_at::date
    JOIN product_units ON product_units.id = products.unit_id 
    JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=so.retailer_id
    JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
    JOIN cities on cities.id=districts.city_id
    join states on states.id=cities.state_id
    join regions on regions.id=states.region_id

    WHERE   True
        AND so.created_at::date between current_date -30 and  CURRENT_date-1
        AND so.sales_order_status_id not in (7,12)
        AND so.channel IN ('telesales','retailer')
        AND pso.purchased_item_count <> 0
    GROUP BY ALL
    )
    group by all 
    )
    qualify rnk = 1 
    )
    )
    WHERE 
    num_retailers >= q1-(1.2*iqr)
    and num_retailers <= q3+(1.2*iqr)
    and num_retailers between median and median+std
    )
    group by all
    order by daily_avg_retailers desc
    '''
    avg_daily  = query_snowflake(query, columns = ['region','cohort_id','product_id','packing_unit_id','daily_avg_retailers','daily_std'])
    avg_daily.product_id = pd.to_numeric(avg_daily.product_id)
    avg_daily.cohort_id = pd.to_numeric(avg_daily.cohort_id)
    avg_daily.packing_unit_id = pd.to_numeric(avg_daily.packing_unit_id)
    avg_daily.daily_avg_retailers = pd.to_numeric(avg_daily.daily_avg_retailers)
    avg_daily['daily_std'] = pd.to_numeric(avg_daily['daily_std'])



    query = '''
    SELECT DISTINCT cat, brand, margin as target_bm
    FROM    performance.commercial_targets cplan
    QUALIFY CASE WHEN DATE_TRUNC('month', MAX(DATE)OVER()) = DATE_TRUNC('month', CURRENT_DATE) THEN DATE_TRUNC('month', CURRENT_DATE)
    ELSE DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') END = DATE_TRUNC('month', date)
    '''
    target_margin = query_snowflake(query, columns = ['cat','brand','target_margin']) 
    target_margin.target_margin=pd.to_numeric(target_margin.target_margin)



    #changed
    quantity_disc_data=quantity_disc_data[~quantity_disc_data['wac_p'].isna()]
    quantity_disc_data=quantity_disc_data[quantity_disc_data['bm']>0]
    quantity_disc_data = quantity_disc_data.merge(target_margin,on=['cat','brand'],how='left')
    quantity_disc_data['min'] = quantity_disc_data['target_margin'] * 0.85 
    quantity_disc_data['min'] =quantity_disc_data['min'].fillna(0.02)
    quantity_disc_data=quantity_disc_data[((quantity_disc_data['bm'] >= quantity_disc_data['min'])&(quantity_disc_data['cat']!= 'حاجه ساقعه'))|((quantity_disc_data['bm'] > 0)&(quantity_disc_data['cat']== 'حاجه ساقعه')) ] 

   #changed
    main_df = quantity_disc_data.copy()
    main_df['t0_perc'] = main_df['ret_below_t1']/(main_df['ret_below_t1']+main_df['ret_t1']+main_df['ret_t2'])
    main_df['t1_perc'] = main_df['ret_t1']/(main_df['ret_below_t1']+main_df['ret_t1']+main_df['ret_t2'])
    main_df['t2_perc'] = main_df['ret_t2']/(main_df['ret_below_t1']+main_df['ret_t1']+main_df['ret_t2'])

    main_df['current_median'] =quantity_disc_data.region_median

    main_df['t0_to_others'] = round(0.25*main_df['ret_below_t1'])
    main_df['t0_to_t1'] = round(0.4*main_df['t0_to_others'])
    main_df['t0_to_t2'] = round(0.6*main_df['t0_to_others'])

    main_df['t0_new_rets'] = main_df['ret_below_t1']-main_df['t0_to_others']
    main_df['t1_new_rets'] = main_df['ret_t1']+main_df['t0_to_t1']
    main_df['t2_new_rets'] = main_df['ret_t2']+main_df['t0_to_t2']

    main_df['t0_new_perc'] = main_df['t0_new_rets']/(main_df['t0_new_rets']+main_df['t1_new_rets']+main_df['t2_new_rets'])
    main_df['t1_new_perc'] = main_df['t1_new_rets']/(main_df['t0_new_rets']+main_df['t1_new_rets']+main_df['t2_new_rets'])
    main_df['t2_new_perc'] = main_df['t2_new_rets']/(main_df['t0_new_rets']+main_df['t1_new_rets']+main_df['t2_new_rets'])

    main_df['new_median'] = (main_df['t0_new_perc']*quantity_disc_data.region_median)+(main_df['t1_new_perc']*main_df['tier_1'] )+(main_df['t2_new_perc']*main_df['tier_2'])

    main_df['t1_nmv'] = main_df['price']*(main_df['t0_new_rets']+main_df['t1_new_rets']+main_df['t2_new_rets'])*main_df['tier_1']*main_df['t1_new_perc']
    main_df['t2_nmv'] = main_df['price']*(main_df['t0_new_rets']+main_df['t1_new_rets']+main_df['t2_new_rets'])*main_df['tier_2']*main_df['t2_new_perc']

    main_df['median_diff'] = main_df['new_median']-main_df['current_median']
    main_df['OA_increase'] = main_df['median_diff']*main_df['price']*(main_df['t0_new_rets']+main_df['t1_new_rets']+main_df['t2_new_rets'])*main_df['bm']
    main_df['OA_burn'] = main_df['OA_increase']/(main_df['t1_nmv']+main_df['t2_nmv'])
    ########new######
    main_df['burn_2'] = 0.02*(main_df['t1_nmv']+main_df['t2_nmv'])
    main_df['burn_40'] = 0.4*main_df['OA_increase']
    main_df['Burn_perc_margin'] = (0.25*main_df['bm'])*(main_df['t1_nmv']+main_df['t2_nmv'])
    main_df['Burn_use']=np.minimum(np.minimum(main_df['burn_2'],main_df['burn_40']),main_df['Burn_perc_margin'])
    ##################
    main_df['t1_nmv_cntrb'] = main_df['t1_nmv']/(main_df['t1_nmv'] +main_df['t2_nmv']) 
    main_df['t2_nmv_cntrb'] = main_df['t2_nmv']/(main_df['t1_nmv'] +main_df['t2_nmv']) 

    main_df['Tiers_diff'] =  (main_df['tier_2'] - main_df['tier_1'] )/ main_df['tier_1']
    main_df['Discount_t1'] = ((main_df['Burn_use']/(1+main_df['Tiers_diff']))*main_df['t1_nmv_cntrb'])/main_df['t1_nmv'] 
    main_df['Discount_t2'] = (main_df['Burn_use'] - ((main_df['Burn_use']/(1+main_df['Tiers_diff']))*main_df['t1_nmv_cntrb']))/main_df['t2_nmv'] 

    main_df = main_df[(~main_df['Discount_t1'].isna()) & (~main_df['Discount_t2'].isna())]
    main_df = main_df[(main_df['t0_new_rets']>0) &(main_df['t1_new_rets']>0) & (main_df['t2_new_rets']>0)]
    main_df = main_df[(main_df['bm']>0)]
    main_df = main_df[(main_df['Discount_t1']>0) &(main_df['Discount_t2']>0)]


    main_df = main_df.merge(sales,on = ['cohort_id','product_id','packing_unit_id'])
    main_df = main_df.merge(avg_daily,on = ['region','cohort_id','product_id','packing_unit_id'])
    main_df= main_df.sort_values(['cohort_id', 'total_sales'], ascending=[True, False])
    main_df['row_number'] = main_df.groupby('cohort_id').cumcount() + 1
    main_df = main_df[main_df['row_number']<=100]
    main_df = main_df[main_df['cohort_id'].isin([700,701,702,703,704,1123])]


    final_quantity_discount = pd.DataFrame(columns =['region','Discounts Group 1','Discounts Group 2','Description'])
    for reg in main_df.region.unique():
        region_data = main_df[main_df['region']== reg]
        for i,r in region_data.iterrows():
            region = r['region']
            product_id = r['product_id']
            packing_unit_id = r['packing_unit_id']
            q_1 = int(r['tier_1'])
            q_2 = int(r['tier_2'])
            d_1 = round(r['Discount_t1']*100,2)
            d_2 = round(r['Discount_t2']*100,2)
            a_1 = [product_id]+[packing_unit_id]+[q_1]+[d_1]
            a_2 = [product_id]+[packing_unit_id]+[q_2]+[d_2]
            new_row = {'region':region ,'Discounts Group 1':a_1,'Discounts Group 2':a_2,'Description':f'{reg}QD'}
            new_row_df = pd.DataFrame([new_row]) 
            final_quantity_discount = pd.concat([final_quantity_discount, new_row_df], ignore_index=True)


    Tag_def = {
        'region': ['Cairo', 'Giza', 'Alexandria', 'Upper Egypt', 'Delta East', 'Delta West'],
        'Tag ID': [2807, 2808, 2809, 2810, 2811, 2812]
    }

    Tag_map = pd.DataFrame(Tag_def)
    slots = ['0-12','13-17','18-23']
    local_tz = pytz.timezone('Africa/Cairo')
    current_hour = datetime.now(local_tz).hour
    chosen_slot = [np.nan,np.nan]

    for slot in slots:
        parts = slot.split("-")
        if(current_hour >= int(parts[0]) and current_hour < int(parts[1])):
            chosen_slot[0] = int(parts[0]) 
            chosen_slot[1] = int(parts[1]) 
            break
        else:
            chosen_slot[0] = 0
            chosen_slot[1] = 0 

    today = datetime.now(local_tz) 
    start_hour = np.maximum(current_hour,chosen_slot[0])
    if(start_hour==current_hour):
        start_mins =  (datetime.now(local_tz).minute) +10
    else:
        start_mins = 30 


    start_date = (today.replace(hour=start_hour, minute=0, second=0, microsecond=0)+ timedelta(minutes=start_mins)).strftime('%d/%m/%Y %H:%M')
    end_date = (today.replace(hour=chosen_slot[1], minute=59, second=0, microsecond=0)).strftime('%d/%m/%Y %H:%M')
    final_quantity_discount = final_quantity_discount.merge(Tag_map,on='region')
    final_quantity_discount['Start Date/Time']= start_date
    final_quantity_discount['End Date/Time']= end_date
    main_df['start_date'] = start_date
    main_df['end_date'] = end_date
    main_df = main_df.merge(Tag_map,on='region')
    
    cart_rules_data = main_df[['region','product_id','packing_unit_id','tier_2']].copy()
    cohort_def = {
        'region': ['Cairo', 'Giza', 'Alexandria', 'Delta East', 'Delta West','Upper Egypt','Upper Egypt','Upper Egypt','Upper Egypt'],
        'cohort_id': [700, 701, 702, 704, 703, 1123,1124,1125,1126]
    }
    region_cohort_map = pd.DataFrame(cohort_def)
    cart_rules_data = cart_rules_data.merge(region_cohort_map,on='region')
    cart_rules_data = cart_rules_data.merge(live_cart_rules,on=['cohort_id','product_id','packing_unit_id'])
    cart_rules_data = cart_rules_data[cart_rules_data['tier_2']>cart_rules_data['current_cart_rule']]
    cart_rules_data=cart_rules_data[['cohort_id','product_id','packing_unit_id','tier_2']]
    
    final_data = final_quantity_discount.groupby(['Tag ID','Description', 'Start Date/Time', 'End Date/Time'], as_index=False).agg({
        'Discounts Group 1': list ,
        'Discounts Group 2' : list
    })
    
    for cohort in cart_rules_data.cohort_id.unique():
        req_data = cart_rules_data[cart_rules_data['cohort_id']==cohort]
        if len(req_data) > 0 :
            req_data = req_data[['product_id','packing_unit_id','tier_2']]
            req_data.columns = ['Product ID','Packing Unit ID','Cart Rules']
            req_data.to_excel(f'CartRules_{cohort}.xlsx', index=False, engine='xlsxwriter')
            sleep(5)
            x =  post_cart_rules(cohort,f'CartRules_{cohort}.xlsx')
            if x.ok:
                print(f"success_{cohort}")
            else:
                print(f"ERROR_{cohort}")
                print(x.content)
                break
            
    final_data.to_excel('QD_upload.xlsx', index=False)
    response = post_QD('QD_upload.xlsx')
    if response.ok:
        main_df.start_date = pd.to_datetime(main_df.start_date, format="%d/%m/%Y %H:%M").dt.strftime('%Y-%m-%d %H:%M')
        main_df.end_date = pd.to_datetime(main_df.end_date, format="%d/%m/%Y %H:%M").dt.strftime('%Y-%m-%d %H:%M')
        main_df['cohort_id'] = pd.to_numeric(main_df['cohort_id'])
        table_exist_test(main_df.rename(columns={'Tag ID': 'tag_id'}), "materialized_views.qd_targets")
        main_df = main_df.drop(columns=["Burn_perc_margin"])
        DatabaseDump(main_df.rename(columns={'Tag ID': 'tag_id'}).reset_index(drop=True), "materialized_views.qd_targets")

    else:
        print("Failed with status:", response.status_code)