In [None]:
%%capture

# Upgrade pip
!pip install --upgrade pip
# Connectivity
!pip install psycopg2-binary  # PostgreSQL adapter
# !pip install snowflake-connector-python  # Snowflake connector
!pip install snowflake-connector-python==3.15.0 # Snowflake connector Older Version
!pip install snowflake-sqlalchemy  # Snowflake SQLAlchemy connector
!pip install warnings # Warnings management
# !pip install pyarrow # Serialization
!pip install keyring==23.11.0 # Key management
!pip install sqlalchemy==1.4.46 # SQLAlchemy
!pip install requests # HTTP requests
!pip install boto3 # AWS SDK
# !pip install slackclient # Slack API
!pip install oauth2client # Google Sheets API
!pip install gspread==5.9.0 # Google Sheets API
!pip install gspread_dataframe # Google Sheets API
!pip install google.cloud # Google Cloud
# Data manipulation and analysis
!pip install polars
!pip install pandas==2.2.1
!pip install numpy
# !pip install fastparquet
!pip install openpyxl # Excel file handling
!pip install xlsxwriter # Excel file handling
# Linear programming
!pip install pulp
# Date and time handling
!pip install --upgrade datetime
!pip install python-time
!pip install --upgrade pytz
# Progress bar
!pip install tqdm
# Database data types
!pip install db-dtypes
# Geospatial data handling
# !pip install geopandas
# !pip install shapely
# !pip install fiona
# !pip install haversine
# Plotting

# Modeling
!pip install statsmodels
!pip install scikit-learn

!pip install import-ipynb

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime
import calendar
import json
from datetime import date, timedelta
from oauth2client.service_account import ServiceAccountCredentials
import setup_environment_2
import importlib
import import_ipynb
import warnings
import time
import boto3
import requests
import json
import os
import base64
warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()
import gspread

/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


In [2]:
def query_snowflake(query, columns=[]):
    import os
    import snowflake.connector
    import numpy as np
    import pandas as pd
    con = snowflake.connector.connect(
        user =  os.environ["SNOWFLAKE_USERNAME"],
        account= os.environ["SNOWFLAKE_ACCOUNT"],
        password= os.environ["SNOWFLAKE_PASSWORD"],
        database =os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        if len(columns) == 0:
            out = pd.DataFrame(np.array(cur.fetchall()))
        else:
            out = pd.DataFrame(np.array(cur.fetchall()),columns=columns)
        return out
    except Exception as e:
        print("Error: ", e)
    finally:
        cur.close()
        con.close()

In [3]:
query = '''
SHOW PARAMETERS LIKE 'TIMEZONE'
'''
x  = query_snowflake(query)
zone_to_use = x[1].values[0]

In [4]:
query = '''
with new_rets as (
select r.id as retailer_id,case when regions.name_en like '%Delta%' then 'Delta' else regions.name_en end as region 
from retailers r 
JOIN materialized_views.retailer_polygon on materialized_views.retailer_polygon.retailer_id=r.id
JOIN districts on districts.id=materialized_views.retailer_polygon.district_id
JOIN cities on cities.id=districts.city_id
join states on states.id=cities.state_id
join regions on regions.id=states.region_id
where r.id in (164812,454300,948856,949828,13026,489945,951661,101228,408392,943844,24891,456499,573350,90837,564005,294183,188186,134581,581875,816040,732075,339270,406087)
),
tags as (
select dta.id as tag_id,dta.name, 
case when name like '%Delta%' then 'Delta'
when name like '%Alex%' then 'Alexandria'
when name like '%sohag%' then 'Upper Egypt'
when name like '%cairo%' then 'Greater Cairo'
else '' end as region
from dynamic_tags dta
where dta.name like '%whole_sale%'
and dta.id > 3000
)

select * 
from (
select nr.retailer_id ,tag_id 
from new_rets nr 
join tags t on t.region = nr.region 

union all 

select dt.taggable_id as retailer_id, dta.id as tag_id 
from dynamic_taggables dt 
join dynamic_tags dta  on dta.id = dt.dynamic_tag_id
where dta.name like '%whole_sale%'
and dta.id > 3000
)
order by 1 


'''
tag_rets  = query_snowflake(query, columns = ['retailer_id','tag_id'])
tag_rets.columns = tag_rets.columns.str.lower()
for col in tag_rets.columns:
    tag_rets[col] = pd.to_numeric(tag_rets[col], errors='ignore')      

In [None]:
query = '''
with base as (
select c.id as cohort_id,dt.dynamic_tag_id,taggable_id as retailer_id
from cohorts c 
join  dynamic_taggables dt on dt.dynamic_tag_id = c.dynamic_tag_id
where c.id in (700,701,702,703,704,1124,1125,1126,1123)
and dt.taggable_id not in (select taggable_id from dynamic_taggables where dynamic_tag_id in (3038,3151,3153,3154))
),
mapping as (
select * 
from (
values
('Cairo',700,2807),
('Giza',701,2808),
('Alexandria',702,2809),
('Delta East',704,2811),
('Delta West',703,2812),
('Upper Egypt',1123,2810),
('Upper Egypt',1124,2810),
('Upper Egypt',1125,2810),
('Upper Egypt',1126,2810)
)x(region,cohort_id,tag_id)

),
final_data as (
select cohort_id,dynamic_tag_id,retailer_id,sum(percent*cntrb) as final_perc
from (
select *,
PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY qty) over(partition by product_id,dynamic_tag_id) as perc_80,
case when qty>perc_80 then least((qty/perc_80)*80,100) else greatest((qty/perc_80)*80,40) end as percent,
nmv/sum(nmv)over(partition by retailer_id) as cntrb
from (
select 
base.cohort_id,
base.dynamic_tag_id,
so.retailer_id,
pso.product_id,
sum(purchased_item_count*basic_unit_count) as qty,
sum(pso.total_price) as nmv


FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id
join base on base.retailer_id = so.retailer_id
        

WHERE   True
    AND so.created_at::date between date_trunc('month',current_date - interval '4 months') and date_trunc('month',current_date)
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0

GROUP BY ALL
)
)
group by all 
HAVING final_perc > 80
)

select tag_id,retailer_id,dt.name as tag_name
from final_data fd
join mapping m on fd.cohort_id = m.cohort_id
join dynamic_tags dt on dt.id = m.tag_id

'''
tags_recc_rets  = query_snowflake(query, columns = ['tag_id','retailer_id','tag_name'])
tags_recc_rets.columns = tags_recc_rets.columns.str.lower()
for col in tags_recc_rets.columns:
    tags_recc_rets[col] = pd.to_numeric(tags_recc_rets[col], errors='ignore')      

In [None]:
req_data = tags_recc_rets.merge(retailer_status,on=['retailer_id'],how='left')
req_data = req_data.fillna(0)
req_data = req_data.groupby(['tag_id','retailer_id','tag_name'])['sales_order_status_id'].max().reset_index()
req_data = req_data[req_data['sales_order_status_id'].isin([0,6,9,12])]
req_data

Unnamed: 0,tag_id,retailer_id,tag_name,sales_order_status_id
2,2807,384,Cairo_quantity_discount,6.0
3,2807,387,Cairo_quantity_discount,6.0
4,2807,439,Cairo_quantity_discount,6.0
5,2807,449,Cairo_quantity_discount,0.0
6,2807,491,Cairo_quantity_discount,12.0
...,...,...,...,...
16975,2812,948044,DW_quantity_discount,6.0
16976,2812,948116,DW_quantity_discount,6.0
16977,2812,948584,DW_quantity_discount,6.0
16978,2812,948641,DW_quantity_discount,6.0


In [7]:
def get_secret(secret_name):
    region_name = "us-east-1"

    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    # In this sample we only handle the specific exceptions for the 'GetSecretValue' API.
    # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
    # We rethrow the exception by default.

    try:
        get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    except ClientError as e:
        if e.response['Error']['Code'] == 'DecryptionFailureException':
            # Secrets Manager can't decrypt the protected secret text using the provided KMS key.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InternalServiceErrorException':
            # An error occurred on the server side.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidParameterException':
            # You provided an invalid value for a parameter.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidRequestException':
            # You provided a parameter value that is not valid for the current state of the resource.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'ResourceNotFoundException':
            # We can't find the resource that you asked for.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
    else:
        # Decrypts secret using the associated KMS CMK.
        # Depending on whether the secret is a string or binary, one of these fields will be populated.
        if 'SecretString' in get_secret_value_response:
            return get_secret_value_response['SecretString']
        else:
            return base64.b64decode(get_secret_value_response['SecretBinary'])

In [8]:
pricing_api_secret = json.loads(get_secret("prod/pricing/api/"))
username = pricing_api_secret["egypt_username"]
password = pricing_api_secret["egypt_password"]
secret = pricing_api_secret["egypt_secret"]

In [9]:
def get_access_token(url, client_id, client_secret):
    """
    get_access_token function takes three parameters and returns a session token
    to connect to MaxAB APIs

    :param url: production MaxAB token URL
    :param client_id: client ID
    :param client_secret: client sercret
    :return: session token
    """
    response = requests.post(
        url,
        data={"grant_type": "password",
              "username": username,
              "password": password},
        auth=(client_id, client_secret),
    )
    return response.json()["access_token"]

In [10]:
import glob

def clear_directory(directory):
    """Delete all files in directory but keep the directory"""
    files = glob.glob(os.path.join(directory, '*'))
    for f in files:
        if os.path.isfile(f):
            os.remove(f)
            print(f"Deleted: {f}")

In [11]:
import os
import time
import base64
import requests
import pandas as pd

def upload_dynamic_tags(req_data, secret):
    """Upload dynamic tags to API"""
    os.makedirs('dynamic_tags', exist_ok=True)
    
    # Get unique tags
    unique_tags = req_data[['tag_id', 'tag_name']].drop_duplicates()
    
    print(f"Found {len(unique_tags)} unique tags to process\n")
    
    success_count = 0
    fail_count = 0
    
    for idx, (tag_id, tag_name) in enumerate(unique_tags.itertuples(index=False), 1):
        # Convert to Python native types
        tag_id = int(tag_id)
        tag_name = str(tag_name)
        
        print(f"[{idx}/{len(unique_tags)}] Processing tag {tag_id}: {tag_name}")
        
        # Get data for this tag
        tag_data = req_data[req_data['tag_id'] == tag_id]
        to_upload = tag_data[['retailer_id']].drop_duplicates()
        
        print(f"  - Retailers: {len(to_upload)}")
        
        # Save to Excel
        file_path = f'dynamic_tags/tag_{tag_id}_list.xlsx'
        to_upload.to_excel(file_path, index=False, sheet_name='Sheet1')
        print(f"  ✓ Saved: {file_path}")
        
        # Read as binary
        with open(file_path, 'rb') as f:
            file_base64 = base64.b64encode(f.read()).decode('utf-8')
        
        # Get token
        try:
            token = get_access_token(
                'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
                'main-system-externals',
                secret
            )
        except Exception as e:
            print(f"  ✗ Failed to get token: {e}\n")
            fail_count += 1
            continue
        
        # Upload to API
        url = f"https://api.maxab.info/commerce/api/admins/v1/internal-dynamic-tags/{tag_id}"
        
        headers = {
            'accept': 'application/json',
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {token}'
        }
        
        payload = {
            "basic_info": {
                "id": tag_id,
                "type": 1,
                "method": 2,
                "name": tag_name
            },
            "file": file_base64,
            "file_extension": "xlsx"
        }
        
        try:
            response = requests.put(url, headers=headers, json=payload)
            
            if response.status_code in [200, 201, 204]:
                print(f"  ✓ Upload successful: {response.status_code}")
                success_count += 1
            else:
                print(f"  ✗ Upload failed: {response.status_code}")
                print(f"    Error: {response.text}")
                fail_count += 1
        except Exception as e:
            print(f"  ✗ Request failed: {e}")
            fail_count += 1
        
        print()
        time.sleep(2)  # Rate limiting
    
    print(f"\n{'='*60}")
    print(f"Summary:")
    print(f"  Success: {success_count}")
    print(f"  Failed: {fail_count}")
    print(f"{'='*60}")

# Usage
clear_directory('dynamic_tags')
upload_dynamic_tags(req_data, secret)

Deleted: dynamic_tags/tag_2809_list.xlsx
Deleted: dynamic_tags/tag_2807_list.xlsx
Deleted: dynamic_tags/tag_2811_list.xlsx
Deleted: dynamic_tags/tag_2810_list.xlsx
Deleted: dynamic_tags/tag_2812_list.xlsx
Deleted: dynamic_tags/tag_2808_list.xlsx
Found 6 unique tags to process

[1/6] Processing tag 2807: Cairo_quantity_discount
  - Retailers: 3385
  ✓ Saved: dynamic_tags/tag_2807_list.xlsx
  ✓ Upload successful: 200

[2/6] Processing tag 2808: Giza_quantity_discount
  - Retailers: 2088
  ✓ Saved: dynamic_tags/tag_2808_list.xlsx
  ✓ Upload successful: 200

[3/6] Processing tag 2809: Alex_quantity_discount
  - Retailers: 1299
  ✓ Saved: dynamic_tags/tag_2809_list.xlsx
  ✓ Upload successful: 200

[4/6] Processing tag 2810: UE_quantity_discount
  - Retailers: 2929
  ✓ Saved: dynamic_tags/tag_2810_list.xlsx
  ✓ Upload successful: 200

[5/6] Processing tag 2811: DE_quantity_discount
  - Retailers: 2072
  ✓ Saved: dynamic_tags/tag_2811_list.xlsx
  ✓ Upload successful: 200

[6/6] Processing tag