In [None]:
# Databricks notebook source
# =============================================================================
# üöÄ PRODUCTION SERVING ENDPOINT ‚Äî SINGLE MODEL ARCHITECTURE
# =============================================================================

from databricks.sdk import WorkspaceClient
from databricks.sdk.service.serving import (
    EndpointCoreConfigInput,
    ServedEntityInput
)
import mlflow
from mlflow.tracking import MlflowClient
import time, sys, os

print("=" * 80)
print("üöÄ STARTING PRODUCTION SERVING ENDPOINT DEPLOYMENT")
print("=" * 80)


# =============================================================================
# ‚úÖ FIXED CONFIG (MUST MATCH TRAINING + REGISTER + PROMOTION SCRIPTS)
# =============================================================================
UC_CATALOG = "workspace"
UC_SCHEMA = "ml"

MODEL_NAME = f"{UC_CATALOG}.{UC_SCHEMA}.house_price_xgboost_uc2"
ENDPOINT_NAME = "house-price-xgboost-prod"

PRODUCTION_ALIAS = "production"

WORKLOAD_SIZE = "Small"
SCALE_TO_ZERO = True


# =============================================================================
# ‚úÖ CLIENTS INITIALIZATION
# =============================================================================
try:
    w = WorkspaceClient()
    mlflow.set_registry_uri("databricks-uc")
    client = MlflowClient()

    print("‚úÖ WorkspaceClient initialized")
    print("‚úÖ MLflow client initialized\n")

except Exception as e:
    print("‚ùå Failed to initialize clients:", e)
    sys.exit(1)


# =============================================================================
# ‚úÖ STEP 1: Fetch latest PRODUCTION model version
# =============================================================================
def get_production_version():
    print("üîç Fetching latest PRODUCTION version...")

    versions = client.search_model_versions(f"name='{MODEL_NAME}'")

    for v in versions:
        mv = client.get_model_version(MODEL_NAME, v.version)
        if PRODUCTION_ALIAS in mv.aliases:
            print(f"‚úÖ Production Model Version: {mv.version}")
            print(f"   Run ID: {mv.run_id}\n")
            return mv.version

    print("‚ùå No production model version found. Run promotion job first.")
    return None


# =============================================================================
# ‚úÖ STEP 2: Check if endpoint exists
# =============================================================================
def endpoint_exists(endpoint_name):
    print(f"üîç Checking if endpoint '{endpoint_name}' exists...")

    try:
        eps = w.serving_endpoints.list()

        for ep in eps:
            if ep.name == endpoint_name:
                print("‚úÖ Endpoint exists\n")
                return True

        print("‚ÑπÔ∏è Endpoint does NOT exist\n")
        return False

    except Exception as e:
        print("‚ö†Ô∏è Error checking endpoint:", e)
        return False


# =============================================================================
# ‚úÖ STEP 3: Deploy (create/update)
# =============================================================================
def deploy_endpoint(endpoint_name, model_name, version, exists):

    served = ServedEntityInput(
        entity_name=model_name,
        entity_version=version,
        workload_size=WORKLOAD_SIZE,
        scale_to_zero_enabled=SCALE_TO_ZERO
    )

    print("=" * 80)
    print("üì¶ DEPLOYMENT CONFIGURATION")
    print("=" * 80)
    print(f"Endpoint Name: {endpoint_name}")
    print(f"Model Name:    {model_name}")
    print(f"Model Version: {version}")
    print(f"Workload Size: {WORKLOAD_SIZE}")
    print(f"Scale-to-zero: {SCALE_TO_ZERO}\n")

    try:
        if exists:
            print("üîÑ Updating existing endpoint...")
            w.serving_endpoints.update_config(
                name=endpoint_name,
                served_entities=[served]
            )
            print("‚úÖ Update triggered\n")

        else:
            print("üÜï Creating new endpoint...")
            cfg = EndpointCoreConfigInput(
                name=endpoint_name,
                served_entities=[served]
            )
            w.serving_endpoints.create(
                name=endpoint_name,
                config=cfg
            )
            print("‚úÖ Creation triggered\n")

        return True

    except Exception as e:
        print("‚ùå Deployment Failed:", e)
        return False


# =============================================================================
# ‚úÖ STEP 4: Wait until endpoint is ready
# =============================================================================
def wait_until_ready(endpoint_name):

    print("‚è≥ Waiting for endpoint to become READY...\n")

    timeout = 1500
    step = 15
    waited = 0

    while waited < timeout:

        ep = w.serving_endpoints.get(name=endpoint_name)
        state = ep.state

        update_state = str(state.config_update)
        ready_state = str(state.ready)

        if "NOT_UPDATING" in update_state and "READY" in ready_state:
            print("\n‚úÖ Endpoint READY for production traffic!\n")
            return True

        if "FAILED" in update_state:
            print("‚ùå Endpoint FAILED:", update_state)
            return False

        print(f"‚è≥ {waited}s | Update: {update_state} | Ready: {ready_state}")
        time.sleep(step)
        waited += step

    print("‚ùå Timeout ‚Äî endpoint not ready")
    return False


# =============================================================================
# ‚úÖ MAIN
# =============================================================================
if __name__ == "__main__":

    version = get_production_version()
    if not version:
        sys.exit(1)

    exists = endpoint_exists(ENDPOINT_NAME)

    if not deploy_endpoint(ENDPOINT_NAME, MODEL_NAME, version, exists):
        sys.exit(1)

    if not wait_until_ready(ENDPOINT_NAME):
        sys.exit(1)

    print("‚úÖ‚úÖ PRODUCTION ENDPOINT DEPLOYMENT COMPLETED SUCCESSFULLY ‚úÖ‚úÖ")

    try:
        dbutils.notebook.exit("ENDPOINT_READY")
    except:
        pass
