In [1]:
%%capture

# Upgrade pip
!pip install --upgrade pip
# Connectivity
!pip install psycopg2-binary  # PostgreSQL adapter
# !pip install snowflake-connector-python  # Snowflake connector
!pip install snowflake-connector-python==3.15.0 # Snowflake connector Older Version
!pip install snowflake-sqlalchemy  # Snowflake SQLAlchemy connector
!pip install warnings # Warnings management
# !pip install pyarrow # Serialization
!pip install keyring==23.11.0 # Key management
!pip install sqlalchemy==1.4.46 # SQLAlchemy
!pip install requests # HTTP requests
!pip install boto3 # AWS SDK
# !pip install slackclient # Slack API
!pip install oauth2client # Google Sheets API
!pip install gspread==5.9.0 # Google Sheets API
!pip install gspread_dataframe # Google Sheets API
!pip install google.cloud # Google Cloud
# Data manipulation and analysis
!pip install polars
!pip install pandas==2.2.1
!pip install numpy
# !pip install fastparquet
!pip install openpyxl # Excel file handling
!pip install xlsxwriter # Excel file handling
# Linear programming
!pip install pulp
# Date and time handling
!pip install --upgrade datetime
!pip install python-time
!pip install --upgrade pytz
# Progress bar
!pip install tqdm
# Database data types
!pip install db-dtypes
# Geospatial data handling
# !pip install geopandas
# !pip install shapely
# !pip install fiona
# !pip install haversine
# Plotting

# Modeling
!pip install statsmodels
!pip install scikit-learn

!pip install import-ipynb

In [2]:
# =============================================================================
# STANDARD LIBRARY IMPORTS
# =============================================================================
import os
import json
import time
import base64
import calendar
import warnings
from pathlib import Path
from datetime import datetime, date, timedelta

# =============================================================================
# THIRD-PARTY IMPORTS
# =============================================================================
import numpy as np
import pandas as pd
import pytz
import requests
import gspread
import boto3
import snowflake.connector
from tqdm import tqdm
from requests import get
from botocore.exceptions import ClientError
from oauth2client.service_account import ServiceAccountCredentials

# =============================================================================
# LOCAL IMPORTS & ENVIRONMENT SETUP
# =============================================================================
import setup_environment_2
import importlib
import import_ipynb

warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

  warn_incompatible_dep(


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


In [3]:
def snowflake_query(country, query, warehouse=None, columns=[], conn=None):
    """
    Execute a query against Snowflake and return results as DataFrame.
    
    Args:
        country: Country identifier (e.g., "Egypt")
        query: SQL query string to execute
        warehouse: Snowflake warehouse (optional)
        columns: Custom column names (optional)
        conn: Existing connection (optional)
        
    Returns:
        pandas DataFrame with query results
    """
    con = snowflake.connector.connect(
        user     = os.environ["SNOWFLAKE_USERNAME"],
        account  = os.environ["SNOWFLAKE_ACCOUNT"],
        password = os.environ["SNOWFLAKE_PASSWORD"],
        database = os.environ["SNOWFLAKE_DATABASE"]
    )

    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        
        column_names = [col[0] for col in cur.description]
        results = cur.fetchall()
        
        if not results:
            out = pd.DataFrame(columns=[name.lower() for name in column_names])
        else:
            if len(columns) == 0:
                out = pd.DataFrame(np.array(results), columns=column_names)
                out.columns = out.columns.str.lower()
            else:
                out = pd.DataFrame(np.array(results), columns=columns)
                out.columns = out.columns.str.lower()
        
        return out
        
    except Exception as e:
        print(f"❌ Query error: {e}")
        raise
        
    finally:
        cur.close()
        con.close()

In [12]:
query = f'''
with retailer_warehouse as (
select *,max(nmv) over(partition by retailer_id) as max_nmv 
from (
SELECT  DISTINCT
		so.retailer_id,
		pso.warehouse_id,
		w.name as warehouse_name,
		sum(pso.total_price) as nmv

FROM product_sales_order pso
JOIN sales_orders so ON so.id = pso.sales_order_id
JOIN products on products.id=pso.product_id
JOIN brands on products.brand_id = brands.id 
JOIN categories ON products.category_id = categories.id 
join SUPPLY_CHAINS sc on sc.id = categories.supply_chain_id and sc.id =1 
join warehouses w on w.id = pso.warehouse_id 

WHERE  so.created_at::date >= current_date - 180
    AND so.sales_order_status_id not in (7,12)
    AND so.channel IN ('telesales','retailer')
    AND pso.purchased_item_count <> 0
	and pso.warehouse_id in (1,8,170,236,337,339,401,501,632,703,797,962)

GROUP BY all 
)
qualify nmv = max_nmv
),
exec_rets as (
select retailer_id
from (
SELECT  DISTINCT
retailer_id,
sales_order_status_id,
created_at::date as o_date ,
max(o_date)over(partition by retailer_id) as last_order
from sales_orders so 
WHERE  so.created_at ::date >= current_date - 120
AND so.sales_order_status_id not in (7,12)
AND so.channel IN ('telesales','retailer')
qualify o_date = last_order
)
where sales_order_status_id not in (6,9,12)

union all 

select id as retailer_id 
from retailers 
where activation = 'false'

union all 

select distinct dta.TAGGABLE_ID as retailer_id
from DYNAMIC_TAGS dt 
join dynamic_taggables dta on dt.id = dta.dynamic_tag_id 
where name like '%whole_sale%'
and dt.id > 3000
)

select distinct warehouse_id,warehouse_name,retailer_id 
from retailer_warehouse 
where retailer_warehouse.retailer_id not in (select * from exec_rets)
'''
ret_tag  = snowflake_query("Egypt",query)
ret_tag.columns = ret_tag.columns.str.lower()
for col in ret_tag.columns:
    ret_tag[col] = pd.to_numeric(ret_tag[col], errors='ignore') 
ret_tag = ret_tag.drop_duplicates()
ret_tag

Unnamed: 0,warehouse_id,warehouse_name,retailer_id
0,170,Sharqya,215927
1,1,Mostorod,358742
2,1,Mostorod,23219
3,703,Menya Samalot,337551
4,337,El-Mahala,610923
...,...,...,...
68855,170,Sharqya,614017
68856,8,Tanta,756351
68857,236,Barageel,660665
68858,703,Menya Samalot,243637


In [13]:
query = '''
select id as tag_id,name as tag_name 
from dynamic_tags 
'''
tag_info  = snowflake_query("Egypt",query)
tag_info.columns = tag_info.columns.str.lower()
for col in tag_info.columns:
    tag_info[col] = pd.to_numeric(tag_info[col], errors='ignore') 

In [14]:
df_warehouse_mapping = pd.DataFrame({
    'warehouse_name': ['Assiut FC', 'Bani sweif', 'Barageel', 'El-Mahala', 'Khorshed Alex', 
                       'Mansoura FC', 'Menya Samalot', 'Mostorod', 'Sakkarah', 'Sharqya', 
                       'Sohag', 'Tanta'],
    'warehouse_id':   [501, 401, 236, 337, 797, 339, 703, 1, 962, 170, 632, 8],
    'tag_id':         [3301, 3302, 3303, 3304, 3305, 3306, 3307, 3308, 3309, 3310, 3311, 3312]
})

In [15]:
tag_data = ret_tag.merge(df_warehouse_mapping,on=['warehouse_name','warehouse_id'])
tag_data = tag_data[['tag_id','retailer_id']]
tag_data = tag_data.merge(tag_info,on='tag_id')

In [16]:
scope = ["https://spreadsheets.google.com/feeds",
         'https://www.googleapis.com/auth/spreadsheets',
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(setup_environment_2.get_secret("prod/maxab-sheets")), scope)
client = gspread.authorize(creds)
""
try:
    add_rets = client.open('Wholesale_retailers').worksheet('Add')
    add_rets_list = pd.DataFrame(add_rets.get_all_records())
    add_rets_list = add_rets_list.retailer_id.values
except:
    add_rets_list = []

In [17]:
tag_data = tag_data[~tag_data['retailer_id'].isin(add_rets_list)]

In [18]:
tag_data.groupby('tag_name')['retailer_id'].nunique()

tag_name
Assiut_QD_rets            3365
Banisweif_QD_rets         3144
Barageel_QD_rets          7375
El_Mahala_QD_rets         4922
Khorshed_Alex_QD_rets     6072
Mansoura_QD_rets          5497
Menya_Samalot_QD_rets     4791
Mostorod_QD_rets         12041
Sakkarah_QD_rets          6531
Sharqya_QD_rets           4738
Sohag_QD_rets             2625
Tanta_QD_rets             7759
Name: retailer_id, dtype: int64

In [19]:
def get_secret(secret_name):
    region_name = "us-east-1"

    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )

    # In this sample we only handle the specific exceptions for the 'GetSecretValue' API.
    # See https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
    # We rethrow the exception by default.

    try:
        get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    except ClientError as e:
        if e.response['Error']['Code'] == 'DecryptionFailureException':
            # Secrets Manager can't decrypt the protected secret text using the provided KMS key.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InternalServiceErrorException':
            # An error occurred on the server side.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidParameterException':
            # You provided an invalid value for a parameter.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'InvalidRequestException':
            # You provided a parameter value that is not valid for the current state of the resource.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
        elif e.response['Error']['Code'] == 'ResourceNotFoundException':
            # We can't find the resource that you asked for.
            # Deal with the exception here, and/or rethrow at your discretion.
            raise e
    else:
        # Decrypts secret using the associated KMS CMK.
        # Depending on whether the secret is a string or binary, one of these fields will be populated.
        if 'SecretString' in get_secret_value_response:
            return get_secret_value_response['SecretString']
        else:
            return base64.b64decode(get_secret_value_response['SecretBinary'])

In [20]:
pricing_api_secret = json.loads(get_secret("prod/pricing/api/"))
username = pricing_api_secret["egypt_username"]
password = pricing_api_secret["egypt_password"]
secret = pricing_api_secret["egypt_secret"]

In [21]:
def get_access_token(url, client_id, client_secret):
    """
    get_access_token function takes three parameters and returns a session token
    to connect to MaxAB APIs

    :param url: production MaxAB token URL
    :param client_id: client ID
    :param client_secret: client sercret
    :return: session token
    """
    response = requests.post(
        url,
        data={"grant_type": "password",
              "username": username,
              "password": password},
        auth=(client_id, client_secret),
    )
    return response.json()["access_token"]

In [22]:
import glob

def clear_directory(directory):
    """Delete all files in directory but keep the directory"""
    files = glob.glob(os.path.join(directory, '*'))
    for f in files:
        if os.path.isfile(f):
            os.remove(f)
            print(f"Deleted: {f}")

In [23]:
import os
import time
import base64
import requests
import pandas as pd

def upload_dynamic_tags(req_data, secret,directory):
    """Upload dynamic tags to API"""
    os.makedirs(directory, exist_ok=True)
    
    # Get unique tags
    unique_tags = req_data[['tag_id', 'tag_name']].drop_duplicates()
    
    print(f"Found {len(unique_tags)} unique tags to process\n")
    
    success_count = 0
    fail_count = 0
    
    for idx, (tag_id, tag_name) in enumerate(unique_tags.itertuples(index=False), 1):
        # Convert to Python native types
        tag_id = int(tag_id)
        tag_name = str(tag_name)
        
        print(f"[{idx}/{len(unique_tags)}] Processing tag {tag_id}: {tag_name}")
        
        # Get data for this tag
        tag_data = req_data[req_data['tag_id'] == tag_id]
        to_upload = tag_data[['retailer_id']].drop_duplicates()
        
        print(f"  - Retailers: {len(to_upload)}")
        
        # Save to Excel
        file_path = f'{directory}/tag_{tag_id}_list.xlsx'
        to_upload.to_excel(file_path, index=False, sheet_name='Sheet1')
        print(f"  ✓ Saved: {file_path}")
        
        # Read as binary
        with open(file_path, 'rb') as f:
            file_base64 = base64.b64encode(f.read()).decode('utf-8')
        
        # Get token
        try:
            token = get_access_token(
                'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
                'main-system-externals',
                secret
            )
        except Exception as e:
            print(f"  ✗ Failed to get token: {e}\n")
            fail_count += 1
            continue
        
        # Upload to API
        url = f"https://api.maxab.info/commerce/api/admins/v1/internal-dynamic-tags/{tag_id}"
        
        headers = {
            'accept': 'application/json',
            'Content-Type': 'application/json',
            'Authorization': f'Bearer {token}'
        }
        
        payload = {
            "basic_info": {
                "id": tag_id,
                "type": 1,
                "method": 2,
                "name": tag_name
            },
            "file": file_base64,
            "file_extension": "xlsx"
        }
        
        try:
            response = requests.put(url, headers=headers, json=payload)
            
            if response.status_code in [200, 201, 204]:
                print(f"  ✓ Upload successful: {response.status_code}")
                success_count += 1
            else:
                print(f"  ✗ Upload failed: {response.status_code}")
                print(f"    Error: {response.text}")
                fail_count += 1
        except Exception as e:
            print(f"  ✗ Request failed: {e}")
            fail_count += 1
        
        print()
        time.sleep(2)  # Rate limiting
    
    print(f"\n{'='*60}")
    print(f"Summary:")
    print(f"  Success: {success_count}")
    print(f"  Failed: {fail_count}")
    print(f"{'='*60}")

# Usage
clear_directory('QD_TAGS')
upload_dynamic_tags(tag_data, secret,'QD_TAGS')

Deleted: QD_TAGS/tag_3304_list.xlsx
Deleted: QD_TAGS/tag_3307_list.xlsx
Deleted: QD_TAGS/tag_3305_list.xlsx
Deleted: QD_TAGS/tag_3312_list.xlsx
Deleted: QD_TAGS/tag_3308_list.xlsx
Deleted: QD_TAGS/tag_3310_list.xlsx
Deleted: QD_TAGS/tag_3311_list.xlsx
Deleted: QD_TAGS/tag_3301_list.xlsx
Deleted: QD_TAGS/tag_3306_list.xlsx
Deleted: QD_TAGS/tag_3302_list.xlsx
Deleted: QD_TAGS/tag_3303_list.xlsx
Deleted: QD_TAGS/tag_3309_list.xlsx
Found 12 unique tags to process

[1/12] Processing tag 3310: Sharqya_QD_rets
  - Retailers: 4738
  ✓ Saved: QD_TAGS/tag_3310_list.xlsx
  ✓ Upload successful: 200

[2/12] Processing tag 3308: Mostorod_QD_rets
  - Retailers: 12041
  ✓ Saved: QD_TAGS/tag_3308_list.xlsx
  ✓ Upload successful: 200

[3/12] Processing tag 3307: Menya_Samalot_QD_rets
  - Retailers: 4791
  ✓ Saved: QD_TAGS/tag_3307_list.xlsx
  ✓ Upload successful: 200

[4/12] Processing tag 3304: El_Mahala_QD_rets
  - Retailers: 4922
  ✓ Saved: QD_TAGS/tag_3304_list.xlsx
  ✓ Upload successful: 200

[5/12