In [0]:
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from mlflow.models.signature import infer_signature
from mlflow.tracking import MlflowClient
from pyspark.sql import SparkSession
import os

# -----------------------------------------------------------------------------------
# Fix: सेट ट्रैकिंग और रजिस्ट्री URI दोनों को Databricks पर स्पष्ट रूप से सेट करें
# ताकि कोई कॉन्फ़िगरेशन त्रुटि न हो।
# -----------------------------------------------------------------------------------
if "DATABRICKS_RUNTIME_VERSION" in os.environ:
    mlflow.set_tracking_uri("databricks")
    # यह लाइन मुख्य समाधान है जो spark.mlflow.modelRegistryUri त्रुटि को रोकता है।
    # FIX: legacy registry के बजाय Unity Catalog का उपयोग करें
    mlflow.set_registry_uri("databricks-uc") 
    print("✅ MLflow tracking and registry URIs set to Databricks Unity Catalog.")
else:
    print("⚠️ Not running in Databricks environment. Skipping tracking/registry URI setup.")

# Load data from Delta table
spark = SparkSession.builder.appName("ModelTraining").getOrCreate()
df = spark.read.format("delta").table("house_price_delta").toPandas()
print(f"✅ Data loaded: {df.shape}")

# Features and target
X = df[["sq_feet", "num_bedrooms", "num_bathrooms", "year_built", "location_score"]]
y = df["price"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Set experiment path (workspace-level)
try:
    mlflow.set_experiment("/Shared/mlops_house_price_prediction_experiment")
    print("✅ Experiment set successfully")
except Exception as e:
    print(f"⚠️ Experiment setup warning: {e}")
    mlflow.set_experiment("mlops_house_price_prediction_experiment")


with mlflow.start_run() as run:
    print(f"🚀 MLflow run started: {run.info.run_id}")

    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Predictions
    predictions = model.predict(X_test)

    # Metrics
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    # Log params and metrics
    mlflow.log_param("n_estimators", model.get_params()['n_estimators'])
    mlflow.log_param("random_state", model.get_params()['random_state'])
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2_score", r2)

    print(f"📊 Metrics - MSE: {mse:.2f}, R²: {r2:.4f}")

    # Infer model signature
    signature = infer_signature(X_train, predictions)

    # ----------------------------------------------------
    # New: Model Registration Logic with Parameter Check
    # ----------------------------------------------------
    registered_model_name = "workspace.ml.house_price_model"
    # वर्तमान रन के पैरामीटर को dynamically प्राप्त करें
    current_params = {
        "n_estimators": str(model.get_params()['n_estimators']),
        "random_state": str(model.get_params()['random_state'])
    }
    client = MlflowClient()
    should_register = True
    
    try:
        # MLflow registry में latest model version की जांच करें
        latest_versions = client.get_latest_versions(registered_model_name)
        if latest_versions:
            latest_version = latest_versions[0]
            latest_run = client.get_run(latest_version.run_id)
            latest_params = latest_run.data.params
            
            # वर्तमान और latest parameters की तुलना करें
            if latest_params == current_params:
                should_register = False
                print("⏭️ Parameters are the same. Skipping model registration.")
            else:
                print("🆕 Parameters have changed. A new model version will be registered.")
        else:
            print("📝 First time registration. No existing model found.")
    except Exception as e:
        # अगर मॉडल मौजूद नहीं है, तो यह एक त्रुटि देगा, जो ठीक है
        print(f"⚠️ Error checking for existing model: {e}")
        print("📝 Assuming this is a first-time registration or a permissions issue.")

    # अब decision के आधार पर model को log करें
    try:
        if should_register:
            # Model को log और register करें
            model_info = mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path="house_price_model",
                signature=signature,
                registered_model_name=registered_model_name
            )
            print(f"✅ Model logged and registered successfully: {model_info.model_uri}")
        else:
            # सिर्फ run artifact के रूप में log करें
            model_info = mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path="house_price_model", 
                signature=signature
            )
            print(f"✅ Model logged successfully as a run artifact: {model_info.model_uri}")
    except Exception as reg_error:
        print(f"❌ Error during model logging and registration: {reg_error}")
        print("⚠️ Registration failed due to permissions. Logging model as a run artifact instead.")
        
        # Fallback: सिर्फ log करें, register नहीं करें
        try:
            model_info = mlflow.sklearn.log_model(
                sk_model=model,
                artifact_path="house_price_model", 
                signature=signature
            )
            print(f"✅ Model logged successfully as a run artifact: {model_info.model_uri}")
        except Exception as fallback_error:
            print(f"❌ Fallback logging also failed: {fallback_error}")
        
print("\n" + "="*50)
print("🎯 TRAINING SUMMARY")
print("="*50)
print(f"Run ID: {run.info.run_id}")
print(f"MSE: {mse:.2f}")
print(f"R² Score: {r2:.2f}")
print(f"Model Performance: {'Good' if r2 > 0.7 else 'Needs Improvement'}")
print("="*50)



# import mlflow
# import mlflow.sklearn
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.metrics import mean_squared_error, r2_score
# from mlflow.models.signature import infer_signature
# from mlflow.tracking import MlflowClient
# from pyspark.sql import SparkSession
# import os

# def setup_mlflow_config():
#     """MLflow configuration को Unity Catalog के लिए setup करें"""
#     try:
#         # Unity Catalog के लिए MLflow registry URI set करें
#         mlflow.set_registry_uri("databricks-uc")
        
#         # Tracking URI भी set करें
#         if "DATABRICKS_RUNTIME_VERSION" in os.environ:
#             mlflow.set_tracking_uri("databricks")
        
#         print("✅ MLflow configuration setup completed")
#         return True
        
#     except Exception as e:
#         print(f"⚠️ MLflow config setup failed: {e}")
#         return False

# # Configuration setup
# config_success = setup_mlflow_config()

# # Initialize MLflow client
# try:
#     client = MlflowClient()
#     print("✅ MLflow client initialized successfully")
# except Exception as e:
#     print(f"❌ MLflow client initialization failed: {e}")
#     client = None

# # Load data from Delta table
# spark = SparkSession.builder.appName("ModelTraining").getOrCreate()
# df = spark.read.format("delta").table("house_price_delta").toPandas()
# print(f"✅ Data loaded: {df.shape}")

# # Features and target
# X = df[["sq_feet", "num_bedrooms", "num_bathrooms", "year_built", "location_score"]]
# y = df["price"]

# # Train-test split
# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.2, random_state=42
# )

# # Set experiment path (workspace-level)
# try:
#     mlflow.set_experiment("/Shared/mlops_house_price_prediction_experiment")
#     print("✅ Experiment set successfully")
# except Exception as e:
#     print(f"⚠️ Experiment setup warning: {e}")
#     mlflow.set_experiment("mlops_house_price_prediction_experiment")

# with mlflow.start_run() as run:
#     print(f"🚀 MLflow run started: {run.info.run_id}")
    
#     # Train model
#     model = RandomForestRegressor(n_estimators=100, random_state=42)
#     model.fit(X_train, y_train)

#     # Predictions
#     predictions = model.predict(X_test)

#     # Metrics
#     mse = mean_squared_error(y_test, predictions)
#     r2 = r2_score(y_test, predictions)

#     # Log params and metrics
#     mlflow.log_param("n_estimators", 100)
#     mlflow.log_param("random_state", 42)
#     mlflow.log_metric("mse", mse)
#     mlflow.log_metric("r2_score", r2)
    
#     print(f"📊 Metrics - MSE: {mse:.2f}, R²: {r2:.4f}")

#     # Infer model signature
#     signature = infer_signature(X_train, predictions)
    
#     # Model registration logic
#     if config_success and client:
#         # Unity Catalog में registered model name
#         registered_model_name = "workspace.ml.house_price_model"
#         current_params = {"n_estimators": 100, "random_state": 42}
        
#         should_register = True
        
#         try:
#             # Check करें कि model पहले से exist करता है या नहीं
#             latest_versions = client.get_latest_versions(registered_model_name)
            
#             if latest_versions:
#                 latest_version = latest_versions[0]
#                 run_info = client.get_run(latest_version.run_id)
#                 latest_params = run_info.data.params
                
#                 # Parameters compare करें
#                 params_match = all(
#                     latest_params.get(key) == str(value) 
#                     for key, value in current_params.items()
#                 )
                
#                 if params_match:
#                     print("⏭️ Same parameters detected. Skipping registration.")
#                     should_register = False
                    
#         except Exception as e:
#             print(f"📝 First time registration (or error checking): {e}")
#             should_register = True
        
#         # Model को log और register करें
#         try:
#             if should_register:
#                 # Model को Unity Catalog में register करें
#                 model_info = mlflow.sklearn.log_model(
#                     sk_model=model,
#                     artifact_path="house_price_model",
#                     signature=signature,
#                     registered_model_name=registered_model_name,
#                     metadata={"training_dataset": "house_price_delta"}
#                 )
#                 print(f"✅ Model registered in Unity Catalog: {model_info.model_uri}")
                
#             else:
#                 # सिर्फ log करें, register नहीं करें
#                 model_info = mlflow.sklearn.log_model(
#                     sk_model=model,
#                     artifact_path="house_price_model", 
#                     signature=signature
#                 )
#                 print(f"📝 Model logged (without registration): {model_info.model_uri}")
                
#         except Exception as reg_error:
#             print(f"⚠️ Registration failed, trying without Unity Catalog: {reg_error}")
            
#             # Fallback: बिना registration के log करें
#             try:
#                 model_info = mlflow.sklearn.log_model(
#                     sk_model=model,
#                     artifact_path="house_price_model",
#                     signature=signature
#                 )
#                 print(f"✅ Model logged successfully (fallback): {model_info.model_uri}")
                
#             except Exception as log_error:
#                 print(f"❌ Model logging also failed: {log_error}")
                
#     else:
#         # अगर client या config fail हो गया हो
#         print("⚠️ Using fallback logging method...")
#         try:
#             model_info = mlflow.sklearn.log_model(
#                 sk_model=model,
#                 artifact_path="house_price_model",
#                 signature=signature
#             )
#             print(f"✅ Model logged (fallback method): {model_info.model_uri}")
            
#         except Exception as fallback_error:
#             print(f"❌ Fallback logging failed: {fallback_error}")

# print("\n" + "="*50)
# print("🎯 TRAINING SUMMARY")
# print("="*50)
# print(f"Run ID: {run.info.run_id}")
# print(f"MSE: {mse:.2f}")
# print(f"R² Score: {r2:.4f}")
# print(f"Model Performance: {'Good' if r2 > 0.7 else 'Needs Improvement'}")
# print("="*50)
