In [0]:
# =============================================================================
# pipeline_config.py
# Search Log Analysis Pipeline | Central Configuration
#
# ALL paths, settings, and constants live here.
# Every other notebook imports from this file.
# To switch storage (e.g. to ADLS Gen 2), change BASE_PATH only.
#
# NOTE: Using Unity Catalog Volumes instead of DBFS (FileStore)
# =============================================================================

# -----------------------------------------------------------------------------
# 1. UNITY CATALOG SETTINGS
#    Catalog → Schema → Volume (3 level namespace in Databricks)
# -----------------------------------------------------------------------------

CATALOG_NAME = "olaride"        # top level - like a database server
SCHEMA_NAME  = "pipeline"       # mid level - like a database
VOLUME_NAME  = "raw_data"       # bottom level - like a folder

# -----------------------------------------------------------------------------
# 2. STORAGE PATHS
# -----------------------------------------------------------------------------

# --- Raw CSV Landing Zone (Unity Catalog Volume) ---
VOLUME_BASE_PATH = f"/Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/{VOLUME_NAME}"
# RAW_CSV_PATH     = f"{VOLUME_BASE_PATH}/search_logs/raw_search_logs.csv"
RAW_CSV_PATH     = f"/Volumes/{CATALOG_NAME}/{SCHEMA_NAME}/{VOLUME_NAME}/search_logs/"

# --- Sample paths (for GitHub showcase) ---
SAMPLE_INPUT_PATH  = f"{VOLUME_BASE_PATH}/samples/sample_search_logs.csv"
SAMPLE_OUTPUT_PATH = f"{VOLUME_BASE_PATH}/samples/expansion_report.csv"

# --- Delta Table Paths ---
# Delta tables are registered directly in Unity Catalog (no file path needed)
# Format: catalog.schema.table_name
BRONZE_TABLE_PATH = f"{CATALOG_NAME}.{SCHEMA_NAME}.bronze_search_logs"
SILVER_TABLE_PATH = f"{CATALOG_NAME}.{SCHEMA_NAME}.silver_search_logs"
GOLD_TABLE_PATH   = f"{CATALOG_NAME}.{SCHEMA_NAME}.gold_expansion_intelligence"

# Short table names (used in CREATE TABLE statements)
BRONZE_TABLE = "bronze_search_logs"
SILVER_TABLE = "silver_search_logs"
GOLD_TABLE   = "gold_expansion_intelligence"

# -----------------------------------------------------------------------------
# 3. PIPELINE SETTINGS
# -----------------------------------------------------------------------------

# Data generation settings (Phase 1)
RAW_DATA_DAYS     = 90      # generate logs for last 90 days
DIRTY_RECORD_RATE = 0.03    # 3% dirty records injected

# Silver layer settings (Phase 3)
MIN_SESSION_DURATION = 0     # drop records with 0 or negative session duration
MAX_SESSION_DURATION = 3600  # drop records with session > 1 hour (outliers)

# Gold layer settings (Phase 4)
MIN_SEARCHES_FOR_SIGNAL = 100  # city needs at least 100 searches to qualify
TOP_N_CITIES            = 10   # show top 10 expansion candidate cities

# -----------------------------------------------------------------------------
# 4. EXPANSION SIGNAL DEFINITION
#    Only these error types count as real expansion demand signals
#    APP_ERROR and PAYMENT_FAILED are technical issues, not demand signals
# -----------------------------------------------------------------------------

EXPANSION_ERROR_TYPES = [
    "NO_SERVICE_AREA",    # strongest signal - city not covered at all
    "NO_DRIVERS_NEARBY",  # secondary signal - covered but no supply
]

# -----------------------------------------------------------------------------
# 5. APP SETTINGS
# -----------------------------------------------------------------------------

APP_NAME   = "OlaRide-Search-Log-Pipeline"
LOG_LEVEL  = "INFO"
WRITE_MODE = "overwrite"   # overwrite for dev, append for production

# -----------------------------------------------------------------------------
# 6. PRINT CONFIG SUMMARY (runs when this file is imported)
# -----------------------------------------------------------------------------

print("=" * 60)
print(f"  {APP_NAME}")
print("=" * 60)
print(f"  CATALOG          : {CATALOG_NAME}")
print(f"  SCHEMA           : {SCHEMA_NAME}")
print(f"  VOLUME           : {VOLUME_NAME}")
print(f"  RAW_CSV_PATH     : {RAW_CSV_PATH}")
print(f"  BRONZE_TABLE     : {BRONZE_TABLE_PATH}")
print(f"  SILVER_TABLE     : {SILVER_TABLE_PATH}")
print(f"  GOLD_TABLE       : {GOLD_TABLE_PATH}")
print(f"  WRITE_MODE       : {WRITE_MODE}")
print("=" * 60)
print("  Config loaded successfully!")
print("=" * 60)