In [None]:
# Databricks notebook source
# =============================================================================
# üöÄ PRODUCTION PROMOTION - CONFIG DRIVEN (FIXED)
# =============================================================================
# Purpose: Promote UAT-passed models to Production
# Now reads from pipeline_config.yml - No hardcoding!
# Prerequisites: Run 05_uat_inference.py first (UAT must pass)
# =============================================================================

import mlflow
from mlflow.tracking import MlflowClient
import os
import time
import sys
import requests
import yaml
import traceback
from pyspark.sql import SparkSession
from IPython import get_ipython

print("=" * 80)
print("üöÄ PRODUCTION PROMOTION (CONFIG-DRIVEN)")
print("=" * 80)

# =============================================================================
# ‚úÖ LOAD PIPELINE CONFIGURATION (Dynamic Path)
# =============================================================================
print("\nüìã Loading pipeline configuration from pipeline_config.yml...")

try:
    # Detect current script path
    try:
        current_dir = os.path.dirname(os.path.abspath(__file__))
    except NameError:
        current_dir = os.getcwd()
    project_root = os.path.abspath(os.path.join(current_dir, ".."))

    # Try local folder (prod_env)
    config_path = os.path.join(current_dir, "pipeline_config.yml")

    # If not found, try dev_env
    if not os.path.exists(config_path):
        config_path = os.path.join(project_root, "dev_env", "pipeline_config.yml")

    # Load YAML file
    with open(config_path, "r") as f:
        pipeline_cfg = yaml.safe_load(f)

    print(f"‚úÖ Loaded pipeline_config.yml from: {config_path}")

    # Model Configuration
    MODEL_TYPE = pipeline_cfg["model"]["type"]
    UC_CATALOG = pipeline_cfg["model"]["catalog"]
    UC_SCHEMA = pipeline_cfg["model"]["schema"]
    BASE_NAME = pipeline_cfg["model"]["base_name"]

    # Auto-generate model name
    MODEL_NAME = f"{UC_CATALOG}.{UC_SCHEMA}.{BASE_NAME}_{MODEL_TYPE}_uc2"

    # Aliases
    PRODUCTION_ALIAS = pipeline_cfg["aliases"]["production"]
    STAGING_ALIAS = pipeline_cfg["aliases"]["staging"]

    # UAT Results Table
    UAT_RESULTS_TABLE = pipeline_cfg["tables"]["uat_results"]

    print(f"‚úÖ Configuration loaded successfully!")
    print(f"\nüìä Configuration Details:")
    print(f"   Model Type: {MODEL_TYPE.upper()}")
    print(f"   Model Name: {MODEL_NAME}")
    print(f"   Staging Alias: @{STAGING_ALIAS}")
    print(f"   Production Alias: @{PRODUCTION_ALIAS}")
    print(f"   UAT Results Table: {UAT_RESULTS_TABLE}")

except FileNotFoundError:
    print("‚ùå ERROR: pipeline_config.yml not found!")
    print("üí° Please create pipeline_config.yml in the same directory or in dev_env/")
    sys.exit(1)
except Exception as e:
    print(f"‚ùå ERROR loading configuration: {e}")
    traceback.print_exc()
    sys.exit(1)

print("=" * 80)

# =============================================================================
# ‚úÖ SAFE EXIT HELPER
# =============================================================================
def exit_notebook_friendly(code=0):
    """Exit safely in Databricks notebooks without triggering SystemExit warnings"""
    ip = get_ipython()
    if ip is not None:
        if code != 0:
            print(f"‚ö†Ô∏è Notebook exit with code {code} (non-zero)")
        return
    else:
        sys.exit(code)

# =============================================================================
# ‚úÖ SLACK NOTIFICATION SETUP
# =============================================================================
def get_slack_webhook():
    """Retrieve Slack webhook URL from available scopes"""
    for scope in ["shared-scope", "dev-scope"]:
        try:
            webhook = dbutils.secrets.get(scope, "SLACK_WEBHOOK_URL")
            if webhook and webhook.strip():
                print(f"‚úì Slack webhook configured from scope '{scope}'")
                return webhook
        except Exception as e:
            print(f"‚ö†Ô∏è Slack webhook not found in scope '{scope}': {e}")
    print("‚ö†Ô∏è No Slack webhook configured")
    return None

SLACK_WEBHOOK_URL = get_slack_webhook()

def send_slack_notification(message, level="info"):
    """Send Slack message"""
    if not SLACK_WEBHOOK_URL:
        print(f"‚ö†Ô∏è Slack webhook not configured")
        print(f"üì¢ Message: {message}")
        return

    emoji_map = {"info": "‚ÑπÔ∏è", "success": "‚úÖ", "warning": "‚ö†Ô∏è", "error": "‚ùå"}
    formatted_message = f"{emoji_map.get(level, '‚ÑπÔ∏è')} {message}"

    try:
        response = requests.post(
            SLACK_WEBHOOK_URL,
            json={"text": formatted_message},
            timeout=5
        )
        if response.status_code == 200:
            print(f"üì¢ Slack notification sent successfully")
        else:
            print(f"‚ö†Ô∏è Slack notification failed: {response.status_code}")
    except Exception as e:
        print(f"‚ö†Ô∏è Slack notification error: {e}")

# =============================================================================
# ‚úÖ INITIALIZATION
# =============================================================================
try:
    if "DATABRICKS_RUNTIME_VERSION" in os.environ:
        mlflow.set_registry_uri("databricks-uc")
        print("\n‚úÖ MLflow connected to Unity Catalog")
    client = MlflowClient()
    spark = SparkSession.builder.appName("Production_Promotion").getOrCreate()
    print("‚úÖ Spark initialized")
except Exception as e:
    print(f"‚ùå Initialization failed: {e}")
    send_slack_notification(
        f"‚ùå Production promotion failed for `{MODEL_NAME}`: Initialization error",
        "error"
    )
    raise e

# =============================================================================
# ‚úÖ HELPER: WAIT UNTIL MODEL IS READY
# =============================================================================
def wait_until_ready(client, model_name, version, timeout=300):
    """Wait for model version to become READY"""
    start = time.time()
    while time.time() - start < timeout:
        mv = client.get_model_version(model_name, version)
        status = mv.status
        if status == "READY":
            print(f"‚úÖ Model v{version} is READY")
            return True
        elif status == "FAILED_REGISTRATION":
            print(f"‚ùå Model v{version} registration failed")
            return False
        print(f"‚è≥ Model v{version} status: {status}")
        time.sleep(5)
    print(f"‚è∞ Timeout: Model v{version} not ready")
    return False

# =============================================================================
# ‚úÖ STEP 1: GET STAGING MODEL
# =============================================================================
def get_staging_version(client):
    """Find staging model version"""
    print(f"\n{'='*80}")
    print(f"üìã STEP 1: Finding STAGING Model")
    print(f"{'='*80}")
    print(f"üîç Model: {MODEL_NAME}")
    print(f"üîç Looking for: @{STAGING_ALIAS}")

    try:
        print(f"\nüîÑ Trying direct alias lookup...")
        staging_mv = client.get_model_version_by_alias(MODEL_NAME, STAGING_ALIAS)
        print(f"‚úÖ Staging model found!")
        print(f"   Version: v{staging_mv.version}")
        print(f"   Run ID: {staging_mv.run_id}")
        print(f"   Status: {staging_mv.status}")
        return staging_mv
    except Exception as e:
        print(f"‚ö†Ô∏è Direct lookup failed: {e}")

    try:
        print(f"\nüîÑ Searching all versions...")
        versions = client.search_model_versions(f"name='{MODEL_NAME}'")
        if not versions:
            print(f"‚ùå No versions found for: {MODEL_NAME}")
            return None

        print(f"üìä Found {len(versions)} total version(s)")
        print(f"\nüìã Available versions:")

        staging_versions = []
        for v in versions:
            mv = client.get_model_version(MODEL_NAME, v.version)
            aliases = mv.aliases if mv.aliases else []
            print(f"   v{v.version}: Aliases={aliases}, Status={mv.status}")

            if any(alias.lower() == STAGING_ALIAS.lower() for alias in aliases):
                staging_versions.append(mv)

        if not staging_versions:
            print(f"\n‚ùå No version with @{STAGING_ALIAS} alias found")
            return None

        staging_mv = max(staging_versions, key=lambda x: int(x.version))
        print(f"\n‚úÖ Selected staging version: v{staging_mv.version}")
        return staging_mv

    except Exception as e:
        print(f"‚ùå Version search failed: {e}")
        traceback.print_exc()
        return None

# =============================================================================
# ‚úÖ STEP 2: CHECK UAT STATUS
# =============================================================================
def check_uat_status(staging_version):
    """Check if model passed UAT validation"""
    print(f"\n{'='*80}")
    print(f"üìã STEP 2: Checking UAT Status")
    print(f"{'='*80}")

    try:
        print(f"üîç Reading UAT results from: {UAT_RESULTS_TABLE}")
        uat_df = spark.table(UAT_RESULTS_TABLE).toPandas()

        if uat_df.empty:
            print(f"‚ö†Ô∏è No UAT results found")
            return False, None

        print(f"üìä Found {len(uat_df)} UAT result(s)")

        # Filter for this specific version
        version_results = uat_df[uat_df['model_version'] == int(staging_version)]

        if version_results.empty:
            print(f"\n‚ö†Ô∏è No UAT results for version v{staging_version}")
            print(f"üí° Run 05_uat_inference.py first!")
            return False, None

        # Get latest result for this version
        latest_result = version_results.sort_values('timestamp', ascending=False).iloc[0]
        uat_status = latest_result['uat_status']

        print(f"\nüìä UAT Results for v{staging_version}:")
        print(f"   Timestamp: {latest_result['timestamp']}")
        print(f"   UAT Status: {uat_status}")
        print(f"   Model Type: {latest_result.get('model_type', 'N/A')}")
        print(f"   MAE:  {latest_result['mae']:,.2f}")
        print(f"   RMSE: {latest_result['rmse']:,.2f}")
        print(f"   R¬≤:   {latest_result['r2']:.4f}")
        print(f"   MAPE: {latest_result['mape']:.2f}%")

        uat_metrics = {
            'mae': float(latest_result['mae']),
            'rmse': float(latest_result['rmse']),
            'r2': float(latest_result['r2']),
            'mape': float(latest_result['mape'])
        }

        if uat_status == "PASSED":
            print(f"\n‚úÖ Model v{staging_version} PASSED UAT")
            return True, uat_metrics
        else:
            print(f"\n‚ùå Model v{staging_version} FAILED UAT")
            return False, uat_metrics

    except Exception as e:
        print(f"‚ùå Failed to check UAT status: {e}")
        traceback.print_exc()
        return False, None

# =============================================================================
# ‚úÖ STEP 3: PROMOTE TO PRODUCTION
# =============================================================================
def promote_to_production(client, staging_mv, uat_metrics):
    """Promote staging model to production"""
    print(f"\n{'='*80}")
    print(f"üìã STEP 3: Promoting to PRODUCTION")
    print(f"{'='*80}")

    version = staging_mv.version

    print(f"\n‚è≥ Ensuring model v{version} is READY...")
    if not wait_until_ready(client, MODEL_NAME, version):
        msg = f"‚ùå Model v{version} not ready for promotion"
        print(msg)
        send_slack_notification(msg, "error")
        return False

    try:
        print(f"\nüöÄ Setting @{PRODUCTION_ALIAS} alias to v{version}...")
        client.set_registered_model_alias(
            name=MODEL_NAME,
            alias=PRODUCTION_ALIAS,
            version=version
        )

        print(f"\n{'='*80}")
        print(f"‚úÖ‚úÖ PROMOTION SUCCESSFUL ‚úÖ‚úÖ")
        print(f"{'='*80}")
        print(f"   Model: {MODEL_NAME}")
        print(f"   Model Type: {MODEL_TYPE.upper()}")
        print(f"   Version: v{version}")
        print(f"   Promoted: @{STAGING_ALIAS} ‚Üí @{PRODUCTION_ALIAS}")
        print(f"   Run ID: {staging_mv.run_id}")

        if uat_metrics:
            print(f"\nüìä UAT Metrics:")
            print(f"   RMSE: {uat_metrics['rmse']:,.2f}")
            print(f"   MAPE: {uat_metrics['mape']:.2f}%")
            print(f"   R¬≤:   {uat_metrics['r2']:.4f}")

        print(f"{'='*80}")

        # Send Slack notification
        metrics_text = ""
        if uat_metrics:
            metrics_text = (
                f"\nüìä Performance Metrics:\n"
                f"   ‚Ä¢ RMSE: {uat_metrics['rmse']:,.2f}\n"
                f"   ‚Ä¢ MAPE: {uat_metrics['mape']:.2f}%\n"
                f"   ‚Ä¢ R¬≤: {uat_metrics['r2']:.4f}"
            )

        send_slack_notification(
            f"üéâ *PRODUCTION DEPLOYMENT SUCCESS!*\n\n"
            f"Model: `{MODEL_NAME}`\n"
            f"Type: *{MODEL_TYPE.upper()}*\n"
            f"Version: *v{version}*\n"
            f"Status: @{STAGING_ALIAS} ‚Üí @{PRODUCTION_ALIAS}\n"
            f"Run ID: {staging_mv.run_id[:8]}..."
            f"{metrics_text}\n\n"
            f"üöÄ Model is now LIVE in production!",
            "success"
        )

        return True

    except Exception as e:
        msg = f"‚ùå Failed to promote: {e}"
        print(msg)
        send_slack_notification(msg, "error")
        traceback.print_exc()
        return False

# =============================================================================
# ‚úÖ MAIN EXECUTION
# =============================================================================
def main():
    """Main production promotion pipeline"""
    try:
        # Step 1: Get staging model
        staging_mv = get_staging_version(client)
        if not staging_mv:
            error_msg = (
                f"‚ùå *No staging model found*\n\n"
                f"Model: `{MODEL_NAME}`\n"
                f"Expected alias: @{STAGING_ALIAS}\n\n"
                f"üí° *Next steps:*\n"
                f"1. Run 03_registration_dynamic.py\n"
                f"2. Run 04_uat_staging.py\n"
                f"3. Verify alias is set to '@{STAGING_ALIAS}'"
            )
            print(f"\n{error_msg}")
            send_slack_notification(error_msg, "error")
            exit_notebook_friendly(1)

        # Step 2: Check UAT status
        uat_passed, uat_metrics = check_uat_status(staging_mv.version)
        if not uat_passed:
            warning_msg = (
                f"‚ö†Ô∏è *Model NOT promoted to production*\n\n"
                f"Model: `{MODEL_NAME}` ({MODEL_TYPE.upper()})\n"
                f"Version: v{staging_mv.version}\n"
                f"Reason: UAT not passed or results not found\n\n"
                f"üí° *Next steps:*\n"
                f"1. Run 05_uat_inference.py\n"
                f"2. Ensure model passes UAT validation\n"
                f"3. Re-run production promotion"
            )
            print(f"\n{warning_msg}")
            send_slack_notification(warning_msg, "warning")
            exit_notebook_friendly(0)

        # Step 3: Promote to production
        success = promote_to_production(client, staging_mv, uat_metrics)

        if success:
            print(f"\n‚ú® Production promotion completed successfully!")
            print(f"\nüìå Next Step:")
            print(f"   Run 07_create_serving_endpoint.py to deploy serving endpoint")

            # Save for workflow
            try:
                dbutils.jobs.taskValues.set(key="production_version", value=staging_mv.version)
                dbutils.jobs.taskValues.set(key="model_type", value=MODEL_TYPE)
                print("\n‚úÖ Task values saved for workflow")
            except:
                print("\n‚ÑπÔ∏è Not running in workflow - skipping task values")

            exit_notebook_friendly(0)
        else:
            print(f"\n‚ùå Production promotion failed")
            exit_notebook_friendly(1)

    except Exception as e:
        error_msg = f"‚ùå Production promotion script failed: {str(e)}"
        print(f"\n{error_msg}")
        send_slack_notification(error_msg, "error")
        traceback.print_exc()
        exit_notebook_friendly(1)

# =============================================================================
# ‚úÖ EXECUTE
# =============================================================================
if __name__ == "__main__":
    main()
