In [None]:
# Databricks notebook source
# =============================================================================
# üöÄ PRODUCTION SERVING ENDPOINT - CONFIG DRIVEN (FIXED)
# =============================================================================
# Purpose: Deploy model serving endpoint for production inference
# Now reads from pipeline_config.yml - No hardcoding!
# Prerequisites: Run 06_production_promotion.py first
# =============================================================================

from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import (
    EndpointCoreConfigInput,
    ServedEntityInput
)
import mlflow
from mlflow.tracking import MlflowClient
import time
import sys
import os
import yaml
import traceback

print("=" * 80)
print("üöÄ PRODUCTION SERVING ENDPOINT DEPLOYMENT (CONFIG-DRIVEN)")
print("=" * 80)

# =============================================================================
# ‚úÖ LOAD PIPELINE CONFIGURATION
# =============================================================================
print("\nüìã Loading pipeline configuration from pipeline_config.yml...")

try:
    with open("pipeline_config.yml", "r") as f:
        pipeline_cfg = yaml.safe_load(f)
    
    # Model Configuration
    MODEL_TYPE = pipeline_cfg["model"]["type"]
    UC_CATALOG = pipeline_cfg["model"]["catalog"]
    UC_SCHEMA = pipeline_cfg["model"]["schema"]
    BASE_NAME = pipeline_cfg["model"]["base_name"]
    
    # Auto-generate model name
    MODEL_NAME = f"{UC_CATALOG}.{UC_SCHEMA}.{BASE_NAME}_{MODEL_TYPE}_uc2"
    
    # Auto-generate endpoint name based on model type
    ENDPOINT_NAME = f"{BASE_NAME.replace('_', '-')}-{MODEL_TYPE}-prod"
    
    # Production alias
    PRODUCTION_ALIAS = pipeline_cfg["aliases"]["production"]
    
    # Serving configuration (with defaults if not in config)
    WORKLOAD_SIZE = pipeline_cfg.get("serving", {}).get("workload_size", "Small")
    SCALE_TO_ZERO = pipeline_cfg.get("serving", {}).get("scale_to_zero", True)
    
    print(f"‚úÖ Configuration loaded successfully!")
    print(f"\nüìä Configuration Details:")
    print(f"   Model Type: {MODEL_TYPE.upper()}")
    print(f"   Model Name: {MODEL_NAME}")
    print(f"   Endpoint Name: {ENDPOINT_NAME}")
    print(f"   Production Alias: @{PRODUCTION_ALIAS}")
    print(f"   Workload Size: {WORKLOAD_SIZE}")
    print(f"   Scale to Zero: {SCALE_TO_ZERO}")
    
except FileNotFoundError:
    print("‚ùå ERROR: pipeline_config.yml not found!")
    print("üí° Please create pipeline_config.yml in the same directory")
    sys.exit(1)
except Exception as e:
    print(f"‚ùå ERROR loading configuration: {e}")
    traceback.print_exc()
    sys.exit(1)

print("=" * 80)

# =============================================================================
# ‚úÖ CLIENTS INITIALIZATION
# =============================================================================
try:
    w = WorkspaceClient()
    mlflow.set_registry_uri("databricks-uc")
    client = MlflowClient()

    print("\n‚úÖ WorkspaceClient initialized")
    print("‚úÖ MLflow client initialized")

except Exception as e:
    print(f"‚ùå Failed to initialize clients: {e}")
    traceback.print_exc()
    sys.exit(1)

# =============================================================================
# ‚úÖ STEP 1: FETCH LATEST PRODUCTION MODEL VERSION
# =============================================================================
def get_production_version():
    """Get production model version from registry"""
    print(f"\n{'='*80}")
    print("üìã STEP 1: Finding PRODUCTION Model Version")
    print(f"{'='*80}")
    print(f"üîç Model: {MODEL_NAME}")
    print(f"üîç Looking for: @{PRODUCTION_ALIAS}")

    try:
        # Try direct alias lookup
        print(f"\nüîÑ Trying direct alias lookup...")
        prod_mv = client.get_model_version_by_alias(MODEL_NAME, PRODUCTION_ALIAS)
        
        print(f"‚úÖ Production model found!")
        print(f"   Version: v{prod_mv.version}")
        print(f"   Run ID: {prod_mv.run_id}")
        print(f"   Status: {prod_mv.status}")
        
        return prod_mv.version
        
    except Exception as e:
        print(f"‚ö†Ô∏è Direct lookup failed: {e}")
    
    try:
        # Fallback: Search through all versions
        print(f"\nüîÑ Searching all versions...")
        versions = client.search_model_versions(f"name='{MODEL_NAME}'")
        
        if not versions:
            print(f"‚ùå No versions found for: {MODEL_NAME}")
            return None
        
        print(f"üìä Found {len(versions)} total version(s)")
        
        prod_versions = []
        for v in versions:
            mv = client.get_model_version(MODEL_NAME, v.version)
            aliases = mv.aliases if mv.aliases else []
            
            if any(alias.lower() == PRODUCTION_ALIAS.lower() for alias in aliases):
                prod_versions.append(mv)
                print(f"   ‚úì Version v{v.version} has @{PRODUCTION_ALIAS} alias")
        
        if not prod_versions:
            print(f"\n‚ùå No version with @{PRODUCTION_ALIAS} alias found")
            print(f"üí° Run 06_production_promotion.py first!")
            return None
        
        # Get latest production version
        prod_mv = max(prod_versions, key=lambda x: int(x.version))
        print(f"\n‚úÖ Selected production version: v{prod_mv.version}")
        
        return prod_mv.version
        
    except Exception as e:
        print(f"‚ùå Version search failed: {e}")
        traceback.print_exc()
        return None

# =============================================================================
# ‚úÖ STEP 2: CHECK IF ENDPOINT EXISTS
# =============================================================================
def endpoint_exists(endpoint_name):
    """Check if serving endpoint already exists"""
    print(f"\n{'='*80}")
    print("üìã STEP 2: Checking Endpoint Status")
    print(f"{'='*80}")
    print(f"üîç Endpoint: {endpoint_name}")

    try:
        eps = w.serving_endpoints.list()

        for ep in eps:
            if ep.name == endpoint_name:
                print(f"‚úÖ Endpoint exists")
                print(f"   State: {ep.state}")
                return True

        print(f"‚ÑπÔ∏è Endpoint does NOT exist (will be created)")
        return False

    except Exception as e:
        print(f"‚ö†Ô∏è Error checking endpoint: {e}")
        return False

# =============================================================================
# ‚úÖ STEP 3: DEPLOY ENDPOINT (CREATE/UPDATE)
# =============================================================================
def deploy_endpoint(endpoint_name, model_name, version, exists):
    """Deploy or update serving endpoint"""
    print(f"\n{'='*80}")
    print("üìã STEP 3: Deploying Serving Endpoint")
    print(f"{'='*80}")

    served = ServedEntityInput(
        entity_name=model_name,
        entity_version=version,
        workload_size=WORKLOAD_SIZE,
        scale_to_zero_enabled=SCALE_TO_ZERO
    )

    print(f"\nüì¶ Deployment Configuration:")
    print(f"   Endpoint Name: {endpoint_name}")
    print(f"   Model Name: {model_name}")
    print(f"   Model Type: {MODEL_TYPE.upper()}")
    print(f"   Model Version: {version}")
    print(f"   Workload Size: {WORKLOAD_SIZE}")
    print(f"   Scale-to-zero: {SCALE_TO_ZERO}")

    try:
        if exists:
            print(f"\nüîÑ Updating existing endpoint...")
            w.serving_endpoints.update_config(
                name=endpoint_name,
                served_entities=[served]
            )
            print(f"‚úÖ Update triggered")

        else:
            print(f"\nüÜï Creating new endpoint...")
            cfg = EndpointCoreConfigInput(
                name=endpoint_name,
                served_entities=[served]
            )
            w.serving_endpoints.create(
                name=endpoint_name,
                config=cfg
            )
            print(f"‚úÖ Creation triggered")

        return True

    except Exception as e:
        print(f"‚ùå Deployment Failed: {e}")
        traceback.print_exc()
        return False

# =============================================================================
# ‚úÖ STEP 4: WAIT UNTIL ENDPOINT IS READY
# =============================================================================
def wait_until_ready(endpoint_name):
    """Wait for endpoint to become ready"""
    print(f"\n{'='*80}")
    print("üìã STEP 4: Waiting for Endpoint Ready Status")
    print(f"{'='*80}")
    print(f"‚è≥ This may take several minutes...")

    timeout = 1500  # 25 minutes
    step = 15
    waited = 0

    while waited < timeout:
        try:
            ep = w.serving_endpoints.get(name=endpoint_name)
            state = ep.state

            update_state = str(state.config_update)
            ready_state = str(state.ready)

            if "NOT_UPDATING" in update_state and "READY" in ready_state:
                print(f"\n{'='*80}")
                print("‚úÖ ENDPOINT READY FOR PRODUCTION TRAFFIC!")
                print(f"{'='*80}")
                return True

            if "FAILED" in update_state:
                print(f"\n‚ùå Endpoint deployment FAILED: {update_state}")
                return False

            print(f"‚è≥ {waited}s | Update: {update_state} | Ready: {ready_state}")
            time.sleep(step)
            waited += step
            
        except Exception as e:
            print(f"‚ö†Ô∏è Error checking status: {e}")
            time.sleep(step)
            waited += step

    print(f"\n‚ùå Timeout ‚Äî endpoint not ready after {timeout}s")
    return False

# =============================================================================
# ‚úÖ MAIN EXECUTION
# =============================================================================
def main():
    """Main serving endpoint deployment pipeline"""
    try:
        # Step 1: Get production version
        version = get_production_version()
        if not version:
            print(f"\n‚ùå No production model found")
            print(f"üí° Run 06_production_promotion.py first")
            sys.exit(1)

        # Step 2: Check if endpoint exists
        exists = endpoint_exists(ENDPOINT_NAME)

        # Step 3: Deploy endpoint
        if not deploy_endpoint(ENDPOINT_NAME, MODEL_NAME, version, exists):
            sys.exit(1)

        # Step 4: Wait for ready status
        if not wait_until_ready(ENDPOINT_NAME):
            sys.exit(1)

        # Success summary
        print(f"\n{'='*80}")
        print("‚úÖ‚úÖ SERVING ENDPOINT DEPLOYMENT COMPLETE ‚úÖ‚úÖ")
        print(f"{'='*80}")
        print(f"\nüìä Deployment Summary:")
        print(f"   Endpoint Name: {ENDPOINT_NAME}")
        print(f"   Model: {MODEL_NAME}")
        print(f"   Model Type: {MODEL_TYPE.upper()}")
        print(f"   Version: v{version}")
        print(f"   Status: READY")
        print(f"\nüìå Next Step:")
        print(f"   Run 08_production_inference.py for batch inference")
        print(f"{'='*80}")

        # Save for workflow
        try:
            dbutils.jobs.taskValues.set(key="endpoint_name", value=ENDPOINT_NAME)
            dbutils.jobs.taskValues.set(key="endpoint_version", value=version)
            print("\n‚úÖ Task values saved for workflow")
        except:
            print("\n‚ÑπÔ∏è Not running in workflow - skipping task values")

        # Exit with success status
        try:
            dbutils.notebook.exit("ENDPOINT_READY")
        except:
            pass

    except Exception as e:
        print(f"\n{'='*80}")
        print("‚ùå SERVING ENDPOINT DEPLOYMENT FAILED")
        print(f"{'='*80}")
        print(f"Error: {str(e)}")
        print(f"{'='*80}")
        traceback.print_exc()
        sys.exit(1)

# =============================================================================
# ‚úÖ EXECUTE
# =============================================================================
if __name__ == "__main__":
    main()