In [None]:
# Databricks notebook source
# =============================================================================
# ‚úÖ FIXED UAT MODEL INFERENCE SCRIPT
# =============================================================================
# Hard-coded configuration matching training, registration, and evaluation scripts
# =============================================================================

# COMMAND ----------
%pip install xgboost requests

# COMMAND ----------
# üîÑ Restart Python to use updated packages
dbutils.library.restartPython()

# COMMAND ----------
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
import numpy as np
import math
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from pyspark.sql import SparkSession
from datetime import datetime
import warnings
import sys
import os
import requests
import traceback

warnings.filterwarnings("ignore")

# =============================================================================
# ‚úÖ HARD-CODED CONFIGURATION (MUST MATCH ALL OTHER SCRIPTS!)
# =============================================================================
# These values are DIRECTLY from training_script.py and Model_Registration.ipynb

UC_CATALOG = "workspace"
UC_SCHEMA = "ml"
MODEL_NAME = f"{UC_CATALOG}.{UC_SCHEMA}.house_price_xgboost_uc2"  # ‚úÖ EXACT MATCH
STAGING_ALIAS = "Staging"  # ‚úÖ Capitalized to match registration script

DELTA_INPUT_TABLE = "workspace.default.house_price_delta"
FEATURE_COLS = ['sq_feet', 'num_bedrooms', 'num_bathrooms', 'year_built', 'location_score']
LABEL_COL = 'price'

# UAT Thresholds
MAPE_THRESHOLD = 15.0  # Maximum acceptable MAPE (%)
R2_THRESHOLD = 0.75    # Minimum acceptable R¬≤

OUTPUT_TABLE = "workspace.default.uat_inference_house_price_xgboost"

print("=" * 80)
print("üöÄ UAT MODEL INFERENCE - FIXED VERSION")
print("=" * 80)
print(f"\nüìã CONFIGURATION:")
print(f"   Model Name: {MODEL_NAME}")
print(f"   Staging Alias: @{STAGING_ALIAS}")
print(f"   Input Table: {DELTA_INPUT_TABLE}")
print(f"   Output Table: {OUTPUT_TABLE}")
print(f"   Feature Columns: {FEATURE_COLS}")
print(f"   MAPE Threshold: ‚â§ {MAPE_THRESHOLD}%")
print(f"   R¬≤ Threshold: ‚â• {R2_THRESHOLD}")
print("=" * 80)

# =============================================================================
# ‚úÖ SLACK NOTIFICATION SETUP
# =============================================================================
def get_slack_webhook():
    """Retrieve Slack webhook from secrets with fallback scopes"""
    for scope in ["shared-scope", "dev-scope"]:
        try:
            webhook = dbutils.secrets.get(scope, "SLACK_WEBHOOK_URL")
            if webhook and webhook.strip():
                print(f"‚úì Slack webhook configured from scope '{scope}'")
                return webhook
        except Exception as e:
            print(f"‚ö†Ô∏è Slack webhook not found in scope '{scope}': {e}")
    return None

SLACK_WEBHOOK_URL = get_slack_webhook()

def send_slack_notification(message, level="info"):
    """Send notification to Slack channel"""
    if not SLACK_WEBHOOK_URL:
        print(f"‚ö†Ô∏è Slack webhook not configured")
        print(f"üì¢ Message: {message}")
        return
    
    emoji_map = {
        "info": "‚ÑπÔ∏è",
        "success": "‚úÖ",
        "warning": "‚ö†Ô∏è",
        "error": "‚ùå"
    }
    
    formatted_message = f"{emoji_map.get(level, '‚ÑπÔ∏è')} {message}"
    
    try:
        response = requests.post(
            SLACK_WEBHOOK_URL, 
            json={"text": formatted_message},
            timeout=5
        )
        if response.status_code == 200:
            print(f"‚úÖ Slack notification sent: {level}")
        else:
            print(f"‚ö†Ô∏è Slack notification failed: {response.status_code}")
    except Exception as e:
        print(f"‚ö†Ô∏è Error sending Slack notification: {e}")

# =============================================================================
# ‚úÖ INITIALIZATION
# =============================================================================
spark = SparkSession.builder.appName("UAT_Inference_Fixed").getOrCreate()
mlflow.set_registry_uri("databricks-uc")
client = MlflowClient()

print("\n‚úÖ MLflow and Spark initialized")

# =============================================================================
# ‚úÖ STEP 1: LOAD MODEL FROM STAGING ALIAS
# =============================================================================
def load_staging_model():
    """Load model from Unity Catalog using Staging alias"""
    print(f"\n{'='*80}")
    print(f"üìã STEP 1: Loading Model from @{STAGING_ALIAS}")
    print(f"{'='*80}")
    
    try:
        # Method 1: Try direct alias lookup
        print(f"‚è≥ Attempting to load: models:/{MODEL_NAME}@{STAGING_ALIAS}")
        
        try:
            model_version = client.get_model_version_by_alias(MODEL_NAME, STAGING_ALIAS)
            version = model_version.version
            run_id = model_version.run_id
            
            print(f"‚úÖ Found model with @{STAGING_ALIAS} alias")
            print(f"   Version: v{version}")
            print(f"   Run ID: {run_id}")
            
        except Exception as e:
            print(f"‚ö†Ô∏è Direct alias lookup failed: {e}")
            print(f"   Trying alternative search method...")
            
            # Method 2: Search through all versions
            model_versions = client.search_model_versions(f"name='{MODEL_NAME}'")
            
            if not model_versions:
                raise ValueError(
                    f"‚ùå No model versions found for {MODEL_NAME}\n"
                    f"üí° Solution: Run Model_Registration.ipynb first to register a model"
                )
            
            # Filter versions with the staging alias
            staging_versions = []
            print(f"\nüîç Searching through {len(model_versions)} version(s)...")
            
            for v in model_versions:
                full_version = client.get_model_version(MODEL_NAME, v.version)
                version_aliases = full_version.aliases if full_version.aliases else []
                
                # Case-insensitive comparison
                if any(alias.lower() == STAGING_ALIAS.lower() for alias in version_aliases):
                    staging_versions.append(full_version)
                    print(f"   ‚úì Version v{v.version} has @{STAGING_ALIAS} alias")
            
            if not staging_versions:
                # List available versions for debugging
                print(f"\n‚ùå No model with alias '@{STAGING_ALIAS}' found!")
                print(f"\nüìã Available versions for {MODEL_NAME}:")
                for v in model_versions[:10]:
                    full_v = client.get_model_version(MODEL_NAME, v.version)
                    v_aliases = full_v.aliases if full_v.aliases else ["No aliases"]
                    print(f"   Version v{v.version}: Aliases = {v_aliases}")
                
                raise ValueError(
                    f"\n‚ùå No model with alias '@{STAGING_ALIAS}' found for {MODEL_NAME}\n"
                    f"üí° Solution: Run Model_Evaluation.ipynb to promote a model to @{STAGING_ALIAS}"
                )
            
            # Get latest version from staging
            model_version = max(staging_versions, key=lambda x: int(x.version))
            version = model_version.version
            run_id = model_version.run_id
            
            print(f"\n‚úÖ Found {len(staging_versions)} version(s) with @{STAGING_ALIAS} alias")
            print(f"   Loading latest: v{version}")
        
        # Load the model
        model_uri = f"models:/{MODEL_NAME}@{STAGING_ALIAS}"
        print(f"\n‚è≥ Loading model...")
        model = mlflow.pyfunc.load_model(model_uri)

        print(f"\n{'='*80}")
        print("‚úÖ MODEL LOADED SUCCESSFULLY")
        print(f"{'='*80}")
        print(f"   Model Name: {MODEL_NAME}")
        print(f"   Version: v{version}")
        print(f"   Run ID: {run_id}")
        print(f"   Status: {model_version.status}")
        
        # Get metric from tags if available
        metric_tag = model_version.tags.get("metric_rmse", "N/A")
        print(f"   Training RMSE: {metric_tag}")
        print(f"{'='*80}\n")
        
        return model, version, run_id

    except Exception as e:
        print(f"\n{'='*80}")
        print("‚ùå FAILED TO LOAD MODEL")
        print(f"{'='*80}")
        print(f"Error: {e}")
        print(f"\nüí° Troubleshooting Steps:")
        print(f"   1. Verify model exists: {MODEL_NAME}")
        print(f"   2. Check if model is registered in Unity Catalog")
        print(f"   3. Run Model_Evaluation.ipynb to promote a model to @{STAGING_ALIAS}")
        print(f"   4. Verify alias is exactly '{STAGING_ALIAS}' (case-sensitive)")
        print(f"{'='*80}\n")
        traceback.print_exc()
        raise

# =============================================================================
# ‚úÖ STEP 2: LOAD UAT DATA
# =============================================================================
def load_uat_data():
    """Load UAT data from Delta table"""
    print(f"\n{'='*80}")
    print("üìã STEP 2: Loading UAT Data")
    print(f"{'='*80}")
    
    try:
        print(f"   Loading from: {DELTA_INPUT_TABLE}")
        df_spark = spark.table(DELTA_INPUT_TABLE)
        df = df_spark.toPandas()

        print(f"   Total rows: {len(df)}")
        print(f"   Columns: {list(df.columns)}")

        # Validate required columns
        missing_features = [col for col in FEATURE_COLS if col not in df.columns]
        if missing_features:
            raise ValueError(f"Missing feature columns: {missing_features}")

        if LABEL_COL not in df.columns:
            raise ValueError(f"Missing label column: {LABEL_COL}")

        # Select features and labels
        X = df[FEATURE_COLS]
        y_true = df[LABEL_COL]

        print(f"\n{'='*80}")
        print("‚úÖ DATA LOADED SUCCESSFULLY")
        print(f"{'='*80}")
        print(f"   Features shape: {X.shape}")
        print(f"   Labels shape: {y_true.shape}")
        print(f"   Sample features:\n{X.head(3)}")
        print(f"{'='*80}\n")
        
        return df, X, y_true

    except Exception as e:
        error_msg = str(e)
        print(f"\n{'='*80}")
        print("‚ùå FAILED TO LOAD DATA")
        print(f"{'='*80}")
        
        if "TABLE_OR_VIEW_NOT_FOUND" in error_msg or "cannot be found" in error_msg:
            print(f"   Delta table '{DELTA_INPUT_TABLE}' does not exist")
            print(f"\nüí° Solution:")
            print(f"   1. Create the table first")
            print(f"   2. Verify the table name: {DELTA_INPUT_TABLE}")
            print(f"   3. Check catalog and schema exist")
        else:
            print(f"   Error: {e}")
        
        print(f"{'='*80}\n")
        traceback.print_exc()
        raise

# =============================================================================
# ‚úÖ STEP 3: RUN INFERENCE
# =============================================================================
def run_inference(model, X):
    """Run model inference on UAT data"""
    print(f"\n{'='*80}")
    print("üìã STEP 3: Running Inference")
    print(f"{'='*80}")
    
    try:
        print(f"   Generating predictions for {len(X)} samples...")
        y_pred = model.predict(X)
        
        print(f"\n{'='*80}")
        print("‚úÖ INFERENCE COMPLETE")
        print(f"{'='*80}")
        print(f"   Predictions generated: {len(y_pred)}")
        print(f"   Sample predictions: {y_pred[:5]}")
        print(f"   Min prediction: {y_pred.min():.2f}")
        print(f"   Max prediction: {y_pred.max():.2f}")
        print(f"   Mean prediction: {y_pred.mean():.2f}")
        print(f"{'='*80}\n")
        
        return y_pred
        
    except Exception as e:
        print(f"\n{'='*80}")
        print("‚ùå INFERENCE FAILED")
        print(f"{'='*80}")
        print(f"   Error: {e}")
        print(f"{'='*80}\n")
        traceback.print_exc()
        raise

# =============================================================================
# ‚úÖ STEP 4: CALCULATE METRICS
# =============================================================================
def evaluate_model(y_true, y_pred):
    """Calculate evaluation metrics"""
    print(f"\n{'='*80}")
    print("üìã STEP 4: Evaluating Model Performance")
    print(f"{'='*80}")
    
    try:
        mae = mean_absolute_error(y_true, y_pred)
        rmse = math.sqrt(mean_squared_error(y_true, y_pred))
        r2 = r2_score(y_true, y_pred)
        mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

        print(f"\nüìä Evaluation Metrics:")
        print(f"   MAE  : {mae:>12,.2f}")
        print(f"   RMSE : {rmse:>12,.2f}")
        print(f"   R¬≤   : {r2:>12.4f}")
        print(f"   MAPE : {mape:>12.2f}%")
        print(f"{'='*80}\n")
        
        return mae, rmse, r2, mape
        
    except Exception as e:
        print(f"\n‚ùå Evaluation failed: {e}")
        traceback.print_exc()
        raise

# =============================================================================
# ‚úÖ STEP 5: UAT VALIDATION
# =============================================================================
def validate_uat(mape, r2, model_version):
    """Validate model against UAT thresholds"""
    print(f"\n{'='*80}")
    print("üìã STEP 5: UAT Validation")
    print(f"{'='*80}")

    print(f"\nüìè Validation Thresholds:")
    print(f"   MAPE: ‚â§ {MAPE_THRESHOLD}%")
    print(f"   R¬≤:   ‚â• {R2_THRESHOLD}")

    print(f"\nüìä Actual Performance:")
    mape_pass = mape <= MAPE_THRESHOLD
    r2_pass = r2 >= R2_THRESHOLD
    
    print(f"   MAPE: {mape:.2f}% {'‚úÖ PASS' if mape_pass else '‚ùå FAIL'}")
    print(f"   R¬≤:   {r2:.4f}  {'‚úÖ PASS' if r2_pass else '‚ùå FAIL'}")

    if mape_pass and r2_pass:
        print(f"\n{'='*80}")
        print("‚úÖ‚úÖ UAT PASSED ‚úÖ‚úÖ")
        print(f"{'='*80}")
        print(f"   Model v{model_version} is ready for production!")
        print(f"{'='*80}\n")

        send_slack_notification(
            f"‚úÖ Model `{MODEL_NAME}` v{model_version} PASSED UAT\n"
            f"üìä MAPE: {mape:.2f}%, R¬≤: {r2:.4f}\n"
            f"üöÄ Ready for production promotion!",
            level="success"
        )
        return "PASSED"
    else:
        print(f"\n{'='*80}")
        print("‚ùå‚ùå UAT FAILED ‚ùå‚ùå")
        print(f"{'='*80}")

        fail_reasons = []
        if not mape_pass:
            fail_reasons.append(f"MAPE too high ({mape:.2f}% > {MAPE_THRESHOLD}%)")
        if not r2_pass:
            fail_reasons.append(f"R¬≤ too low ({r2:.4f} < {R2_THRESHOLD})")

        print(f"   Failure reasons:")
        for reason in fail_reasons:
            print(f"   ‚Ä¢ {reason}")
        print(f"{'='*80}\n")

        send_slack_notification(
            f"‚ùå Model `{MODEL_NAME}` v{model_version} FAILED UAT\n"
            f"üìä MAPE: {mape:.2f}%, R¬≤: {r2:.4f}\n"
            f"üö´ Reasons: {', '.join(fail_reasons)}",
            level="error"
        )

        return "FAILED"

# =============================================================================
# ‚úÖ STEP 6: LOG RESULTS
# =============================================================================
def log_results(model_version, run_id, mae, rmse, r2, mape, status):
    """Log UAT results to Delta table"""
    print(f"\n{'='*80}")
    print("üìã STEP 6: Logging Results")
    print(f"{'='*80}")
    
    try:
        # Check if table exists and for duplicates
        table_exists = False
        
        try:
            existing = spark.table(OUTPUT_TABLE)
            table_exists = True
            existing_df = existing.toPandas()
            print(f"   Table exists: Yes")
            print(f"   Existing rows: {len(existing_df)}")
            
            # Check for duplicate
            if not existing_df.empty:
                last = existing_df.iloc[-1]
                
                is_duplicate = (
                    int(last.model_version) == int(model_version) and
                    math.isclose(float(last.mae), mae, rel_tol=1e-6) and
                    math.isclose(float(last.rmse), rmse, rel_tol=1e-6)
                )
                
                if is_duplicate:
                    print("\n   ‚ÑπÔ∏è Duplicate entry detected - skipping log")
                    return
                    
        except Exception:
            print(f"   Table exists: No (will be created)")
        
        # Prepare result data
        result_df = pd.DataFrame([{
            "timestamp": datetime.now(),
            "model_name": MODEL_NAME,
            "model_version": int(model_version),
            "run_id": run_id,
            "mae": float(mae),
            "rmse": float(rmse),
            "r2": float(r2),
            "mape": float(mape),
            "uat_status": status
        }])

        # Write to Delta
        spark_df = spark.createDataFrame(result_df)
        
        if table_exists:
            spark_df.write.mode("append").option("mergeSchema", "true").saveAsTable(OUTPUT_TABLE)
        else:
            spark_df.write.mode("append").saveAsTable(OUTPUT_TABLE)

        print(f"\n{'='*80}")
        print("‚úÖ RESULTS LOGGED SUCCESSFULLY")
        print(f"{'='*80}")
        print(f"   Output Table: {OUTPUT_TABLE}")
        print(f"   Model Version: v{model_version}")
        print(f"   UAT Status: {status}")
        print(f"{'='*80}\n")

    except Exception as e:
        print(f"\n‚ö†Ô∏è Failed to log results: {e}")
        traceback.print_exc()

# =============================================================================
# ‚úÖ MAIN EXECUTION
# =============================================================================
def main():
    """Main UAT inference pipeline"""
    try:
        print("\n" + "="*80)
        print("üé¨ STARTING UAT INFERENCE PIPELINE")
        print("="*80 + "\n")

        # Execute pipeline steps
        model, model_version, run_id = load_staging_model()
        df, X, y_true = load_uat_data()
        y_pred = run_inference(model, X)
        mae, rmse, r2, mape = evaluate_model(y_true, y_pred)
        status = validate_uat(mape, r2, model_version)
        log_results(model_version, run_id, mae, rmse, r2, mape, status)

        # Final summary
        print("\n" + "="*80)
        print("‚ú® UAT INFERENCE COMPLETED SUCCESSFULLY ‚ú®")
        print("="*80)
        print(f"\nüìä Final Summary:")
        print(f"   Model: {MODEL_NAME}")
        print(f"   Version: v{model_version}")
        print(f"   Run ID: {run_id}")
        print(f"   UAT Status: {status}")
        print(f"   Metrics:")
        print(f"     ‚Ä¢ RMSE: {rmse:,.2f}")
        print(f"     ‚Ä¢ MAPE: {mape:.2f}%")
        print(f"     ‚Ä¢ R¬≤:   {r2:.4f}")
        print(f"     ‚Ä¢ MAE:  {mae:,.2f}")
        print("="*80 + "\n")

        # Exit with appropriate code
        print(f"\n‚úÖ UAT pipeline completed with status: {status}")


    except Exception as e:
        print("\n" + "="*80)
        print("‚ùå UAT INFERENCE FAILED")
        print("="*80)
        print(f"Error: {str(e)}")
        print("="*80 + "\n")
        
        send_slack_notification(
            f"‚ùå UAT pipeline failed for `{MODEL_NAME}`\n"
            f"Error: {str(e)}",
            level="error"
        )
        
        sys.exit(1)

# =============================================================================
# ‚úÖ EXECUTE
# =============================================================================
if __name__ == "__main__":
    main()





# # Databricks notebook source
# # =============================================================
# # ‚úÖ UAT MODEL INFERENCE SCRIPT (FINAL ALIGNED VERSION)
# # =============================================================
# # COMMAND ----------
# %pip install xgboost

# # COMMAND ----------
# import mlflow
# from mlflow.tracking import MlflowClient
# import pandas as pd
# import numpy as np
# import math
# from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# from pyspark.sql import SparkSession
# from datetime import datetime
# import warnings
# import sys
# import os

# warnings.filterwarnings("ignore")

# # =============================================================
# # ‚úÖ CONFIGURATION (ALIGNED WITH REGISTRATION & STAGING SCRIPTS)
# # =============================================================
# UC_CATALOG = "workspace"
# UC_SCHEMA = "ml"
# MODEL_NAME = f"{UC_CATALOG}.{UC_SCHEMA}.house_price_xgboost_uc2"
# STAGING_ALIAS = "Staging"

# # Delta input table for UAT inference
# DELTA_INPUT_TABLE = "workspace.default.house_price_delta"

# # Feature columns (must match training script)
# FEATURE_COLS = ['sq_feet', 'num_bedrooms', 'num_bathrooms', 'year_built', 'location_score']
# LABEL_COL = 'price'

# # Thresholds for validation
# MAPE_THRESHOLD = 15.0   # target < 15%
# R2_THRESHOLD   = 0.75   # target > 0.75

# # Output table for UAT results
# OUTPUT_TABLE = "workspace.default.uat_inference_house_price_xgboost"


# # =============================================================
# # ‚úÖ INITIALIZATION
# # =============================================================
# print("="*80)
# print("üöÄ UAT MODEL INFERENCE - ALIGNED VERSION")
# print("="*80)

# spark = SparkSession.builder.appName("UAT_Inference_Aligned").getOrCreate()
# mlflow.set_registry_uri("databricks-uc")
# client = MlflowClient()

# print(f"\nüìã Configuration:")
# print(f"   Model: {MODEL_NAME}")
# print(f"   Alias: {STAGING_ALIAS}")
# print(f"   Input Table: {DELTA_INPUT_TABLE}")
# print(f"   Output Table: {OUTPUT_TABLE}")
# print(f"   Feature Columns: {FEATURE_COLS}")


# # =============================================================
# # ‚úÖ 1Ô∏è‚É£ Load model from STAGING alias
# # =============================================================
# def load_staging_model(client, model_name, alias):
#     """
#     Load model from Unity Catalog using alias (aligned with staging script)
#     """
#     print(f"\n{'='*70}")
#     print(f"üìã STEP 1: Loading Model from @{alias}")
#     print(f"{'='*70}")
    
#     try:
#         # Search for model versions with staging alias
#         model_versions = client.search_model_versions(f"name='{model_name}'")
        
#         # Filter versions that have the staging alias
#         staging_versions = []
#         for v in model_versions:
#             full_version = client.get_model_version(model_name, v.version)
#             aliases = [a.lower() for a in full_version.aliases] if full_version.aliases else []
#             if alias.lower() in aliases:
#                 staging_versions.append(full_version)
        
#         if not staging_versions:
#             raise ValueError(f"No model with alias '{alias}' found for {model_name}")
        
#         # Get latest version from staging
#         latest_staging = max(staging_versions, key=lambda x: int(x.version))
#         version = latest_staging.version
#         run_id = latest_staging.run_id
        
#         print(f"   Found {len(staging_versions)} version(s) with @{alias} alias")
#         print(f"   Loading version: v{version}")
        
#         model_uri = f"models:/{model_name}@{alias}"
#         model = mlflow.pyfunc.load_model(model_uri)

#         print(f"\n‚úÖ Model Loaded Successfully!")
#         print(f"   Version: v{version}")
#         print(f"   Run ID: {run_id}")
#         print(f"   Status: {latest_staging.status}")
        
#         # Get metric from tags if available
#         metric_tag = latest_staging.tags.get("metric_rmse", "N/A")
#         print(f"   Training RMSE: {metric_tag}")
        
#         return model, version, run_id

#     except Exception as e:
#         print(f"\n‚ùå Failed to load model from {alias}: {e}")
#         import traceback
#         traceback.print_exc()
#         raise ValueError(f"Model loading failed: {e}")


# # =============================================================
# # ‚úÖ 2Ô∏è‚É£ Load Delta table for inference
# # =============================================================
# def load_data(spark):
#     """
#     Load UAT data from Delta table with proper feature selection
#     """
#     print(f"\n{'='*70}")
#     print("üìã STEP 2: Loading UAT Data")
#     print(f"{'='*70}")
    
#     try:
#         print(f"   Loading from: {DELTA_INPUT_TABLE}")
#         df_spark = spark.table(DELTA_INPUT_TABLE)
#         df = df_spark.toPandas()

#         print(f"   Total rows loaded: {len(df)}")
#         print(f"   Columns: {list(df.columns)}")

#         # Validate required columns exist
#         missing_features = [col for col in FEATURE_COLS if col not in df.columns]
#         if missing_features:
#             raise ValueError(f"Missing feature columns: {missing_features}")

#         if LABEL_COL not in df.columns:
#             raise ValueError(f"Missing label column: {LABEL_COL}")

#         # Select only required features and label
#         X = df[FEATURE_COLS]
#         y_true = df[LABEL_COL]

#         print(f"\n‚úÖ Data Loaded Successfully!")
#         print(f"   Features shape: {X.shape}")
#         print(f"   Labels shape: {y_true.shape}")
        
#         return df, X, y_true

#     except Exception as e:
#         error_msg = str(e)
#         if "TABLE_OR_VIEW_NOT_FOUND" in error_msg or "cannot be found" in error_msg:
#             print(f"\n‚ùå Delta table '{DELTA_INPUT_TABLE}' does not exist.")
#             print(f"   Please create the table first or verify the table name.")
#             print(f"   Expected format: catalog.schema.table_name")
#         else:
#             print(f"\n‚ùå Failed to load input table: {e}")
#         import traceback
#         traceback.print_exc()
#         raise ValueError(f"Data loading failed: {e}")


# # =============================================================
# # ‚úÖ 3Ô∏è‚É£ Run inference
# # =============================================================
# def run_inference(model, X):
#     """
#     Run model inference on UAT data
#     """
#     print(f"\n{'='*70}")
#     print("üìã STEP 3: Running Inference")
#     print(f"{'='*70}")
    
#     try:
#         print(f"   Running predictions on {len(X)} samples...")
#         y_pred = model.predict(X)
        
#         print(f"\n‚úÖ Inference Complete!")
#         print(f"   Predictions generated: {len(y_pred)}")
#         print(f"   Sample predictions: {y_pred[:5]}")
        
#         return y_pred
        
#     except Exception as e:
#         print(f"\n‚ùå Inference failed: {e}")
#         import traceback
#         traceback.print_exc()
#         raise


# # =============================================================
# # ‚úÖ 4Ô∏è‚É£ Calculate metrics
# # =============================================================
# def evaluate(y_true, y_pred):
#     """
#     Calculate evaluation metrics for UAT
#     """
#     print(f"\n{'='*70}")
#     print("üìã STEP 4: Evaluating Model Performance")
#     print(f"{'='*70}")
    
#     try:
#         mae = mean_absolute_error(y_true, y_pred)
#         rmse = math.sqrt(mean_squared_error(y_true, y_pred))
#         r2 = r2_score(y_true, y_pred)
#         mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

#         print(f"\nüìä Evaluation Metrics:")
#         print(f"   MAE  : {mae:.3f}")
#         print(f"   RMSE : {rmse:.3f}")
#         print(f"   R¬≤   : {r2:.3f}")
#         print(f"   MAPE : {mape:.2f}%")
        
#         return mae, rmse, r2, mape
        
#     except Exception as e:
#         print(f"\n‚ùå Evaluation failed: {e}")
#         raise


# # =============================================================
# # ‚úÖ 5Ô∏è‚É£ Threshold validation (UAT pass/fail)
# # =============================================================
# def validate(mape, r2):
#     """
#     Validate model performance against UAT thresholds
#     """
#     print(f"\n{'='*70}")
#     print("üìã STEP 5: UAT Validation")
#     print(f"{'='*70}")
    
#     print(f"\nüìè Validation Thresholds:")
#     print(f"   MAPE threshold: ‚â§ {MAPE_THRESHOLD}%")
#     print(f"   R¬≤ threshold:   ‚â• {R2_THRESHOLD}")
    
#     print(f"\nüìä Actual Performance:")
#     print(f"   MAPE: {mape:.2f}% {'‚úÖ' if mape <= MAPE_THRESHOLD else '‚ùå'}")
#     print(f"   R¬≤:   {r2:.3f}  {'‚úÖ' if r2 >= R2_THRESHOLD else '‚ùå'}")
    
#     if mape <= MAPE_THRESHOLD and r2 >= R2_THRESHOLD:
#         print(f"\n{'='*70}")
#         print("‚úÖ‚úÖ UAT PASSED ‚úÖ‚úÖ")
#         print(f"{'='*70}")
#         return "PASSED"
#     else:
#         print(f"\n{'='*70}")
#         print("‚ùå‚ùå UAT FAILED ‚ùå‚ùå")
#         print(f"{'='*70}")
        
#         # Show which criteria failed
#         if mape > MAPE_THRESHOLD:
#             print(f"   ‚ö†Ô∏è MAPE too high: {mape:.2f}% > {MAPE_THRESHOLD}%")
#         if r2 < R2_THRESHOLD:
#             print(f"   ‚ö†Ô∏è R¬≤ too low: {r2:.3f} < {R2_THRESHOLD}")
        
#         return "FAILED"


# # =============================================================
# # ‚úÖ 6Ô∏è‚É£ Log results to Delta table (with smart schema handling)
# # =============================================================
# def log_results(spark, model_name, model_version, run_id, mae, rmse, r2, mape, status):
#     """
#     Log UAT results to Delta table with duplicate prevention and backward compatibility
#     """
#     print(f"\n{'='*70}")
#     print("üìã STEP 6: Logging Results")
#     print(f"{'='*70}")
    
#     try:
#         # Check if table exists and its schema
#         table_exists = False
        
#         try:
#             existing = spark.table(OUTPUT_TABLE)
#             table_exists = True
#             existing_df = existing.toPandas()
#             print(f"   Table exists: Yes")
#             print(f"   Existing rows: {len(existing_df)}")
            
#             # Check for duplicates
#             if not existing_df.empty:
#                 last = existing_df.iloc[-1]
                
#                 # Check if metrics are identical to last run
#                 is_duplicate = (
#                     int(last.model_version) == int(model_version) and
#                     math.isclose(float(last.mae), mae, rel_tol=1e-6) and
#                     math.isclose(float(last.rmse), rmse, rel_tol=1e-6) and
#                     math.isclose(float(last.r2), r2, rel_tol=1e-6) and
#                     math.isclose(float(last.mape), mape, rel_tol=1e-6)
#                 )
                
#                 if is_duplicate:
#                     print("\n‚ÑπÔ∏è Duplicate Entry Detected")
#                     print("   Metrics unchanged from last run ‚Üí Skipping log")
#                     return
                    
#         except Exception as e:
#             print(f"   Table exists: No (will be created)")
#             print(f"   Note: {e}")
        
#         # Prepare data for logging
#         result_df = pd.DataFrame([{
#             "timestamp": datetime.now(),
#             "model_name": model_name,
#             "model_version": int(model_version),
#             "run_id": run_id,
#             "mae": float(mae),
#             "rmse": float(rmse),
#             "r2": float(r2),
#             "mape": float(mape),
#             "uat_status": status
#         }])

#         # Write to Delta table
#         spark_df = spark.createDataFrame(result_df)
        
#         if table_exists:
#             # Table exists - append with schema evolution if needed
#             spark_df.write.mode("append").option("mergeSchema", "true").saveAsTable(OUTPUT_TABLE)
#         else:
#             # New table - create it
#             spark_df.write.mode("append").saveAsTable(OUTPUT_TABLE)

#         print(f"\n‚úÖ Results Logged Successfully!")
#         print(f"   Output Table: {OUTPUT_TABLE}")
#         print(f"   Model Name: {model_name}")
#         print(f"   Model Version: v{model_version}")
#         print(f"   Run ID: {run_id}")
#         print(f"   UAT Status: {status}")

#     except Exception as e:
#         print(f"\n‚ùå Failed to log results: {e}")
#         import traceback
#         traceback.print_exc()
#         raise


# # =============================================================
# # ‚úÖ MAIN EXECUTION FLOW
# # =============================================================
# def main():
#     """
#     Main execution flow for UAT inference
#     """
#     try:
#         print("\n" + "="*80)
#         print("üé¨ STARTING UAT INFERENCE PIPELINE")
#         print("="*80)
        
#         # Step 1: Load model
#         model, model_version, run_id = load_staging_model(client, MODEL_NAME, STAGING_ALIAS)
        
#         # Step 2: Load data
#         df, X, y_true = load_data(spark)
        
#         # Step 3: Run inference
#         y_pred = run_inference(model, X)
        
#         # Step 4: Evaluate
#         mae, rmse, r2, mape = evaluate(y_true, y_pred)
        
#         # Step 5: Validate
#         status = validate(mape, r2)
        
#         # Step 6: Log results
#         log_results(spark, MODEL_NAME, model_version, run_id, mae, rmse, r2, mape, status)

#         print("\n" + "="*80)
#         print("‚ú® UAT INFERENCE COMPLETED SUCCESSFULLY ‚ú®")
#         print("="*80)
#         print(f"\nüìä Summary:")
#         print(f"   Model: {MODEL_NAME}")
#         print(f"   Version: v{model_version}")
#         print(f"   Run ID: {run_id}")
#         print(f"   UAT Status: {status}")
#         print(f"   RMSE: {rmse:.3f}")
#         print(f"   MAPE: {mape:.2f}%")
#         print(f"   R¬≤: {r2:.3f}")
#         print("="*80 + "\n")

#     except Exception as e:
#         print("\n" + "="*80)
#         print("‚ùå UAT INFERENCE FAILED")
#         print("="*80)
#         print(f"Error: {str(e)}")
#         print("="*80 + "\n")
#         sys.exit(1)


# # =============================================================
# # ‚úÖ EXECUTE
# # =============================================================
# if __name__ == "__main__":
#     main()