# Treasure Hunt Scheduler

This script runs daily at 12 AM to process Treasure Hunt SKUs and push prices to the MaxAB API.

## Workflow
1. Read 'Treasure Hunt' Google Sheet (Sheet6)
2. Process SKUs - determine which are visible today
3. Handle duplicates - keep only today's entry for duplicate SKUs
4. Set remove_min=1 for SKUs not visible today
5. Push prices to cohort 61


In [1]:
# =============================================================================
# IMPORTS
# =============================================================================
import pandas as pd
import numpy as np
import json
import time
import base64
import os
from datetime import datetime
import pytz

# AWS for secrets management
import boto3
from botocore.exceptions import ClientError

# HTTP requests for API calls
import requests

# Progress bar for chunk uploads
from tqdm import tqdm

# Google Sheets integration
import gspread
from oauth2client.service_account import ServiceAccountCredentials

# Import setup_environment_2 for Google Sheets credentials
import sys
sys.path.append('..')
import setup_environment_2

# Cairo timezone
CAIRO_TZ = pytz.timezone('Africa/Cairo')

def get_cairo_now():
    """Get current datetime in Cairo timezone."""
    return datetime.now(CAIRO_TZ)

def get_cairo_today():
    """Get today's date in Cairo timezone."""
    return datetime.now(CAIRO_TZ).date()

CAIRO_NOW = get_cairo_now()
TODAY = get_cairo_today()

# =============================================================================
# CONFIGURATION
# =============================================================================
CHUNK_SIZE_SPECIAL = 2000   # Chunk size for cohort 61
UPLOAD_DIR = 'uploads'
MANUAL_DIR = 'manual'

# Google Sheets configuration
TREASURE_HUNT_GSHEET = 'Treasure Hunt'
TREASURE_HUNT_SHEET = 'Sheet6'

# Fixed cohort for Treasure Hunt
TREASURE_HUNT_COHORT = 61

print(f"Treasure Hunt Scheduler loaded at {CAIRO_NOW.strftime('%Y-%m-%d %H:%M:%S')} Cairo time")
print(f"Today's date: {TODAY}")


Treasure Hunt Scheduler loaded at 2026-02-09 17:40:53 Cairo time
Today's date: 2026-02-09


In [2]:
# =============================================================================
# AWS & API FUNCTIONS
# =============================================================================

def get_secret(secret_name: str) -> str:
    """
    Retrieve a secret from AWS Secrets Manager.
    """
    region_name = "us-east-1"
    session = boto3.session.Session()
    client = session.client(service_name='secretsmanager', region_name=region_name)

    try:
        response = client.get_secret_value(SecretId=secret_name)
    except ClientError as e:
        error_code = e.response['Error']['Code']
        error_messages = {
            'DecryptionFailureException': "Can't decrypt secret using provided KMS key",
            'InternalServiceErrorException': "Server-side error occurred",
            'InvalidParameterException': "Invalid parameter value provided",
            'InvalidRequestException': "Invalid request for current resource state",
            'ResourceNotFoundException': "Requested resource not found"
        }
        if error_code in error_messages:
            print(f"AWS Error: {error_messages[error_code]}")
        raise e
    
    if 'SecretString' in response:
        return response['SecretString']
    return base64.b64decode(response['SecretBinary'])


def get_access_token(url: str, client_id: str, client_secret: str) -> str:
    """
    Get OAuth2 access token for MaxAB API authentication.
    """
    response = requests.post(
        url,
        data={
            "grant_type": "password",
            "username": API_USERNAME,
            "password": API_PASSWORD
        },
        auth=(client_id, client_secret),
    )
    return response.json()["access_token"]


def _get_api_token() -> str:
    """
    Get a fresh API token for MaxAB API requests.
    """
    return get_access_token(
        'https://sso.maxab.info/auth/realms/maxab/protocol/openid-connect/token',
        'main-system-externals',
        API_SECRET
    )


def post_prices(cohort_id: int, file_name: str) -> requests.Response:
    """
    Upload a pricing Excel sheet to MaxAB API for a specific cohort.
    """
    token = _get_api_token()
    url = f"https://api.maxab.info/main-system/api/admin-portal/cohorts/{cohort_id}/pricing"
    
    files = [('sheet', (file_name, open(file_name, 'rb'), 
              'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'))]
    headers = {'Authorization': f'bearer {token}'}
    
    return requests.post(url, headers=headers, data={}, files=files)


In [3]:
# =============================================================================
# API CREDENTIALS INITIALIZATION
# =============================================================================
pricing_api_secret = json.loads(get_secret("prod/pricing/api/"))
API_USERNAME = pricing_api_secret["egypt_username"]
API_PASSWORD = pricing_api_secret["egypt_password"]
API_SECRET = pricing_api_secret["egypt_secret"]

print("‚úì API credentials loaded successfully")

# =============================================================================
# GOOGLE SHEETS CLIENT INITIALIZATION
# =============================================================================
GSHEET_SCOPE = [
    "https://spreadsheets.google.com/feeds",
    'https://www.googleapis.com/auth/spreadsheets',
    "https://www.googleapis.com/auth/drive.file",
    "https://www.googleapis.com/auth/drive"
]

gsheet_creds = ServiceAccountCredentials.from_json_keyfile_dict(
    json.loads(setup_environment_2.get_secret("prod/maxab-sheets")), 
    GSHEET_SCOPE
)
gsheet_client = gspread.authorize(gsheet_creds)

print("‚úì Google Sheets client initialized")


‚úì API credentials loaded successfully
‚úì Google Sheets client initialized


In [4]:
# =============================================================================
# HELPER FUNCTIONS
# =============================================================================


def load_treasure_hunt_data() -> pd.DataFrame:
    """
    Load Treasure Hunt data from Google Sheets.
    
    Returns:
        DataFrame with treasure hunt SKUs and their details
    """
    print(f"Loading data from '{TREASURE_HUNT_GSHEET}' sheet '{TREASURE_HUNT_SHEET}'...")
    
    sheet = gsheet_client.open(TREASURE_HUNT_GSHEET).worksheet(TREASURE_HUNT_SHEET)
    df = pd.DataFrame(sheet.get_all_records())
    
    if df.empty:
        print("  ‚ö†Ô∏è No data in Treasure Hunt sheet")
        return pd.DataFrame()
    
    print(f"  ‚úì Loaded {len(df)} rows")
    print(f"  Columns: {list(df.columns)}")
    
    return df


In [14]:
# =============================================================================
# TREASURE HUNT DATA PROCESSING
# =============================================================================

def process_treasure_hunt_skus(treasure_skus: pd.DataFrame) -> pd.DataFrame:
    """
    Process Treasure Hunt SKUs to determine visibility.
    
    Logic:
    1. Convert created_at to datetime
    2. For each SKU, check if it should be visible today
    3. If an SKU appears multiple times with different dates:
       - Keep only the entry for today (if exists)
       - Ignore entries for other dates for this SKU
    4. For SKUs not visible today, set remove_min = 1
    
    Args:
        treasure_skus: Raw data from Google Sheet
        
    Returns:
        Processed DataFrame ready for price push
    """
    if treasure_skus.empty:
        return pd.DataFrame()
    
    df = treasure_skus.copy()
    
    # Convert created_at to datetime
    current_year = get_cairo_today().year
    df['created_at'] = pd.to_datetime(df['created_at'].astype(str) + f'-{current_year}', format='%d-%B-%Y', errors='coerce')
    df['created_date'] = df['created_at'].dt.date
    
    # Initialize columns
    df['ind'] = 1
    df['remove_min'] = np.nan
    
    # Get fresh Cairo date at execution time
    today_cairo = get_cairo_today()
    
    print(f"\nProcessing {len(df)} treasure hunt entries...")
    print(f"Today (Cairo): {today_cairo}")
    print(f"Unique SKUs: {df['sku'].nunique()}")
    print(f"Date range: {df['created_date'].min()} to {df['created_date'].max()}")
    
    # Identify SKUs that are visible today (using fresh Cairo date)
    df['is_today'] = df['created_date'] == today_cairo
    
    # Get list of SKUs that have an entry for today
    skus_visible_today = df[df['is_today']]['sku'].unique()
    print(f"SKUs visible today: {len(skus_visible_today)}")
    
    # For SKUs with today's entry: keep only today's entry
    # For SKUs without today's entry: set remove_min = 1
    
    # Filter: Keep today's entries for SKUs that have today's date
    df_today = df[df['is_today']].copy()
    
    # For SKUs without today's entry, we still need them but with remove_min = 1
    skus_not_today = df[~df['sku'].isin(skus_visible_today)]['sku'].unique()
    df_not_today = df[df['sku'].isin(skus_not_today)].copy()
    
    # For non-today SKUs, keep only the most recent entry per SKU
    if not df_not_today.empty:
        df_not_today = df_not_today.sort_values('created_at', ascending=False)
        df_not_today = df_not_today.drop_duplicates(subset=['sku'], keep='first')
        df_not_today['remove_min'] = 1
    
    # Combine the results
    result = pd.concat([df_today, df_not_today], ignore_index=True)
    
    print(f"\nAfter processing:")
    print(f"  Total entries: {len(result)}")
    print(f"  Visible today (remove_min=NaN): {len(result[result['remove_min'].isna()])}")
    print(f"  Not visible (remove_min=1): {len(result[result['remove_min'] == 1])}")
    
    return result


In [19]:
# =============================================================================
# PUSH PRICES FUNCTION (Simplified for Treasure Hunt)
# =============================================================================

MODE_TESTING = 'testing'
MODE_LIVE = 'live'

def push_treasure_hunt_prices(df_prices: pd.DataFrame,
                               mode: str = 'testing') -> dict:
    """
    Push Treasure Hunt prices to MaxAB API (cohort 61).
    
    This is a simplified version of push_prices specifically for Treasure Hunt:
    - No packing unit expansion (products are already at correct level)
    - Fixed cohort ID = 61
    - Visibility based on remove_min flag
    
    Args:
        df_prices: DataFrame with processed treasure hunt data
                   Required columns: product_id, sku, Price, packing_unit_id, remove_min
        mode: 'testing' or 'live'
        
    Returns:
        dict with upload results
    """
    cohort = TREASURE_HUNT_COHORT
    
    # Validate mode
    if mode not in [MODE_TESTING, MODE_LIVE]:
        print(f"‚ö†Ô∏è Invalid mode '{mode}'. Using 'testing' mode.")
        mode = MODE_TESTING
    
    print(f"\n{'üß™' if mode == MODE_TESTING else 'üöÄ'} MODE: {mode.upper()}")
    if mode == MODE_TESTING:
        print("   Files will be prepared but NOT uploaded to API")
    else:
        print("   Files will be prepared AND uploaded to API")
    
    # Initialize result tracking
    result = {
        'total_received': len(df_prices),
        'pushed': 0,
        'failed': 0,
        'timestamp': get_cairo_now().strftime('%Y-%m-%d %H:%M:%S'),
        'mode': mode,
        'cohort': cohort
    }
    
    if df_prices.empty:
        print("‚ö†Ô∏è No data to push")
        return result
    
    print(f"\n{'='*60}")
    print(f"PUSH TREASURE HUNT PRICES - Cohort {cohort}")
    print(f"{'='*60}")
    print(f"Total entries: {len(df_prices)}")
    
    # Ensure output directories exist
    os.makedirs(UPLOAD_DIR, exist_ok=True)
    os.makedirs(MANUAL_DIR, exist_ok=True)
    
    # Prepare output DataFrame with API-expected columns
    out = df_prices[['product_id', 'sku', 'pu_id', 'new_price', 'ind', 'remove_min']].copy()
    out.columns = ['Product ID', 'Product Name', 'Packing Unit ID', 'Price', 'ind', 'remove_min']
    
    # Set visibility based on remove_min flag
    # remove_min = NaN -> Visible (YES)
    # remove_min = 1 -> Not visible (NO)
    out['Visibility (YES/NO)'] = 'YES'
    out.loc[out['remove_min'] == 1, 'Visibility (YES/NO)'] = 'NO'
    
    # Drop helper columns and duplicates
    out = out.drop(columns=['ind', 'remove_min']).drop_duplicates()
    
    # Add required empty columns for API
    out['Execute At (format:dd/mm/yyyy HH:mm)'] = None
    out['Tags'] = None
    
    # Filter out invalid prices
    out = out[out['Price'] > 1].reset_index(drop=True)
    
    if len(out) == 0:
        print("  No valid prices to push")
        return result
    
    print(f"\nVisibility summary:")
    print(f"  Visible (YES): {len(out[out['Visibility (YES/NO)'] == 'YES'])}")
    print(f"  Hidden (NO): {len(out[out['Visibility (YES/NO)'] == 'NO'])}")
    
    # Save full file for reference
    file_name_ = f'{UPLOAD_DIR}/treasure_hunt_{cohort}.xlsx'
    out.to_excel(file_name_, index=False)
    print(f"\n  Saved: {file_name_} ({len(out)} rows)")
    time.sleep(2)
    
    # In testing mode, skip the actual API upload
    if mode == MODE_TESTING:
        print(f"  üß™ [TESTING] Would upload {len(out)} prices (skipped)")
        result['pushed'] = len(out)
        print(f"\n{'='*60}")
        print("üß™ TESTING MODE COMPLETE - NO PRICES WERE UPLOADED")
        print(f"{'='*60}")
        return result
    
    # Split into chunks for API upload
    chunk_size = CHUNK_SIZE_SPECIAL
    chunks = [out[i:i + chunk_size] for i in range(0, len(out), chunk_size)]
    print(f"  Split into {len(chunks)} chunks (size: {chunk_size})")
    
    # Save and upload chunks
    fileslist = []
    for i, chunk in tqdm(enumerate(chunks), total=len(chunks), desc="  Saving chunks"):
        output_file = f'{MANUAL_DIR}/treasure_hunt_{cohort}_chunk_{i + 1}.xlsx'
        fileslist.append(output_file)
        chunk.to_excel(output_file, index=False)
    
    # Upload each chunk
    print("  Uploading...")
    total_pushed = 0
    total_failed = 0
    
    for file in fileslist:
        chunk_num = file.split('chunk_')[1].split('.xls')[0]
        response = post_prices(cohort, file)
        
        if '"success":true' in str(response.content).lower():
            print(f"    ‚úì Chunk {chunk_num} uploaded successfully")
            total_pushed += len(pd.read_excel(file))
        else:
            print(f"    ‚úó ERROR chunk {chunk_num}")
            print(f"      Response: {response.content}")
            total_failed += len(pd.read_excel(file))
            break
    
    result['pushed'] = total_pushed
    result['failed'] = total_failed
    
    print(f"\n{'='*60}")
    print("üöÄ UPLOAD COMPLETE")
    print(f"{'='*60}")
    print(f"Mode: {mode}")
    print(f"Total pushed: {total_pushed}")
    print(f"Total failed: {total_failed}")
    
    return result


In [20]:
# =============================================================================
# MAIN EXECUTION
# =============================================================================

def run_treasure_hunt_scheduler(mode: str = 'testing'):
    """
    Main entry point for the Treasure Hunt scheduler.
    
    Args:
        mode: 'testing' or 'live'
    """
    # Get fresh Cairo time at execution
    cairo_now = get_cairo_now()
    cairo_today = get_cairo_today()
    
    print(f"\n{'='*60}")
    print("TREASURE HUNT SCHEDULER")
    print(f"{'='*60}")
    print(f"Execution time: {cairo_now.strftime('%Y-%m-%d %H:%M:%S')} Cairo")
    print(f"Today's date (Cairo): {cairo_today}")
    
    # Step 1: Load data from Google Sheet
    treasure_data = load_treasure_hunt_data()
    
    if treasure_data.empty:
        print("\n‚ùå No data to process. Exiting.")
        return
    
    # Step 2: Process SKUs
    processed_data = process_treasure_hunt_skus(treasure_data)
    
    if processed_data.empty:
        print("\n‚ùå No processed data. Exiting.")
        return
    
    # Step 3: Push prices
    result = push_treasure_hunt_prices(processed_data, mode=mode)
    
    print(f"\n‚úì Scheduler completed")
    return result


# Run in testing mode by default
# Change to mode='live' to actually push prices
# result = run_treasure_hunt_scheduler(mode='testing')


In [21]:
run_treasure_hunt_scheduler()


TREASURE HUNT SCHEDULER
Execution time: 2026-02-09 17:51:06 Cairo
Today's date (Cairo): 2026-02-09
Loading data from 'Treasure Hunt' sheet 'Sheet6'...
  ‚úì Loaded 17 rows
  Columns: ['product_id', 'sku', 'supplier', 'Cat', 'brand', 'created_at', 'pu', 'pu_id', 'new_price']

Processing 17 treasure hunt entries...
Today (Cairo): 2026-02-09
Unique SKUs: 1
Date range: 2026-02-10 to 2026-02-26
SKUs visible today: 0

After processing:
  Total entries: 1
  Visible today (remove_min=NaN): 0
  Not visible (remove_min=1): 1

üß™ MODE: TESTING
   Files will be prepared but NOT uploaded to API

PUSH TREASURE HUNT PRICES - Cohort 61
Total entries: 1
  No valid prices to push

‚úì Scheduler completed


{'total_received': 1,
 'pushed': 0,
 'failed': 0,
 'timestamp': '2026-02-09 17:51:07',
 'mode': 'testing',
 'cohort': 61}