In [None]:
# Databricks notebook source
import mlflow
from mlflow.tracking import MlflowClient
import sys
import os

# ==================== CONFIGURATION ====================

# üí° ‡§Æ‡§π‡§§‡•ç‡§µ‡§™‡•Ç‡§∞‡•ç‡§£: ‡§Ø‡§π Experiment Name, model_train.py ‡§Æ‡•á‡§Ç ‡§™‡•ç‡§∞‡§Ø‡•Å‡§ï‡•ç‡§§ ‡§®‡§æ‡§Æ ‡§∏‡•á ‡§Æ‡•á‡§≤ ‡§ñ‡§æ‡§®‡§æ ‡§ö‡§æ‡§π‡§ø‡§è‡•§
EXPERIMENT_NAME = "/Shared/House_Price_Prediction_Delta_RF"

# ‚ö†Ô∏è UNITY CATALOG ‡§ï‡•á ‡§≤‡§ø‡§è ‡§®‡§æ‡§Æ‡§ï‡§∞‡§£ (MANDATORY)
UC_CATALOG_NAME = "workspace"
UC_SCHEMA_NAME = "ml"
REGISTERED_MODEL_NAME = f"{UC_CATALOG_NAME}.{UC_SCHEMA_NAME}.house_price_model_uc" 

MODEL_ARTIFACT_PATH = "sklearn_rf_model"

# ‡§Æ‡•â‡§°‡§≤ ‡§™‡•à‡§∞‡§æ‡§Æ‡•Ä‡§ü‡§∞ ‡§ú‡§ø‡§®‡•ç‡§π‡•á‡§Ç ‡§π‡§Æ ‡§§‡•Å‡§≤‡§®‡§æ ‡§ï‡•á ‡§≤‡§ø‡§è ‡§â‡§™‡§Ø‡•ã‡§ó ‡§ï‡§∞‡•á‡§Ç‡§ó‡•á (model_train.py ‡§∏‡•á 'best_' prefixes ‡§ï‡§æ ‡§â‡§™‡§Ø‡•ã‡§ó)
PARAM_KEYS = ['best_n_estimators', 'best_max_depth', 'best_min_samples_split', 'best_min_samples_leaf']

# ‡§Æ‡•â‡§°‡§≤ ‡§Æ‡•á‡§ü‡•ç‡§∞‡§ø‡§ï (Metric) ‡§ú‡§ø‡§∏‡•á ‡§π‡§Æ comparison ‡§ï‡•á ‡§≤‡§ø‡§è ‡§â‡§™‡§Ø‡•ã‡§ó ‡§ï‡§∞‡•á‡§Ç‡§ó‡•á (model_train.py ‡§∏‡•á 'test_rmse')
METRIC_KEY = 'test_rmse' 
METRIC_TOLERANCE = 1e-6 # ‡§Æ‡•á‡§ü‡•ç‡§∞‡§ø‡§ï ‡§ï‡•Ä ‡§§‡•Å‡§≤‡§®‡§æ ‡§ï‡•á ‡§≤‡§ø‡§è ‡§∏‡•ç‡§µ‡•Ä‡§ï‡§æ‡§∞‡•ç‡§Ø ‡§§‡•ç‡§∞‡•Å‡§ü‡§ø (tolerance)

# ==================== FUNCTIONS ====================

def normalize_param_value(value):
	"""
	‡§™‡•à‡§∞‡§æ‡§Æ‡•Ä‡§ü‡§∞ ‡§ï‡•Ä value ‡§ï‡•ã normalize ‡§ï‡§∞‡§§‡§æ ‡§π‡•à ‡§§‡§æ‡§ï‡§ø comparison ‡§∏‡§π‡•Ä ‡§π‡•ã ‡§∏‡§ï‡•á‡•§
	MLflow ‡§ï‡§≠‡•Ä-‡§ï‡§≠‡•Ä parameters ‡§ï‡•ã string ‡§ï‡•á ‡§∞‡•Ç‡§™ ‡§Æ‡•á‡§Ç store ‡§ï‡§∞‡§§‡§æ ‡§π‡•à‡•§
	"""
	if value is None:
		return None
	
	# String ‡§ï‡•ã appropriate type ‡§Æ‡•á‡§Ç convert ‡§ï‡§∞‡•á‡§Ç
	str_value = str(value)
	
	# Check if it's a number
	try:
		# ‡§™‡§π‡§≤‡•á integer try ‡§ï‡§∞‡•á‡§Ç
		if '.' not in str_value and str_value.lstrip('-').isdigit():
			return int(str_value)
		else:
			return float(str_value)
	except (ValueError, TypeError):
		# ‡§Ø‡§¶‡§ø number ‡§®‡§π‡•Ä‡§Ç ‡§π‡•à ‡§§‡•ã string ‡§π‡•Ä return ‡§ï‡§∞‡•á‡§Ç
		return str_value


def get_latest_run_info(client: MlflowClient):
	"""
	‡§®‡§ø‡§∞‡•ç‡§¶‡§ø‡§∑‡•ç‡§ü ‡§è‡§ï‡•ç‡§∏‡§™‡•á‡§∞‡§ø‡§Æ‡•á‡§Ç‡§ü ‡§Æ‡•á‡§Ç ‡§π‡§æ‡§≤ ‡§π‡•Ä ‡§Æ‡•á‡§Ç ‡§∏‡§´‡§≤ ‡§π‡•Å‡§è ‡§∞‡§® ‡§ï‡§æ ID, Parameters, ‡§î‡§∞ Metrics ‡§™‡•ç‡§∞‡§æ‡§™‡•ç‡§§ ‡§ï‡§∞‡§§‡§æ ‡§π‡•à‡•§
	"""
	try:
		experiment = client.get_experiment_by_name(EXPERIMENT_NAME)
		if not experiment:
			print(f"‚ùå Error: Experiment '{EXPERIMENT_NAME}' not found.")
			return None, {}, {}

		# ‡§∏‡§≠‡•Ä ‡§∞‡§® ‡§ï‡•ã ‡§∂‡•Å‡§∞‡•Ç ‡§π‡•ã‡§®‡•á ‡§ï‡•á ‡§∏‡§Æ‡§Ø ‡§ï‡•á ‡§Ö‡§®‡•Å‡§∏‡§æ‡§∞ ‡§ï‡•ç‡§∞‡§Æ‡§¨‡§¶‡•ç‡§ß ‡§ï‡§∞‡•á‡§Ç
		runs = client.search_runs(
			experiment_ids=[experiment.experiment_id],
			order_by=["start_time DESC"],
			max_results=1,
			# metrics ‡§î‡§∞ params ‡§ï‡•ã ‡§∏‡§æ‡§• ‡§Æ‡•á‡§Ç fetch ‡§ï‡§∞‡•á‡§Ç
			filter_string="attributes.status = 'FINISHED'" 
		)
		
		if runs:
			latest_run = runs[0]
			run_id = latest_run.info.run_id
			
			# Parameters fetch ‡§ï‡§∞‡•á‡§Ç (‡§ö‡§æ‡§∞‡•ã‡§Ç 'best_' ‡§™‡•à‡§∞‡§æ‡§Æ‡•Ä‡§ü‡§∞‡•ç‡§∏)
			run_params = {}
			for key in PARAM_KEYS:
				if key in latest_run.data.params:
					param_value = latest_run.data.params[key]
					run_params[key] = normalize_param_value(param_value)
			
			# Metric fetch ‡§ï‡§∞‡•á‡§Ç ('test_rmse')
			run_metrics = {}
			if METRIC_KEY in latest_run.data.metrics:
				run_metrics[METRIC_KEY] = latest_run.data.metrics[METRIC_KEY]
			
			print(f"‚úì Latest Run ID found: {run_id}")
			print(f"  Params: {run_params}")
			print(f"  Metrics: {run_metrics}")
			
			return run_id, run_params, run_metrics
		else:
			print(f"‚ö† Warning: No FINISHED runs found in experiment '{EXPERIMENT_NAME}'.")
			return None, {}, {}
			
	except Exception as e:
		print(f"‚ùå Error fetching latest run info: {e}")
		return None, {}, {}


def get_run_info_from_version(client: MlflowClient, model_name: str, version_number: str):
	"""
	Model version ‡§∏‡•á associated run ‡§ï‡•á Parameters ‡§î‡§∞ Metrics ‡§®‡§ø‡§ï‡§æ‡§≤‡§§‡§æ ‡§π‡•à‡•§
	"""
	try:
		version_details = client.get_model_version(model_name, version_number)
		
		# Version ‡§∏‡•á run_id ‡§®‡§ø‡§ï‡§æ‡§≤‡•á‡§Ç
		run_id = version_details.run_id
		
		if not run_id:
			print(f"‚ö† Warning: No run_id found for version {version_number}")
			return {}, {}
		
		# Run ‡§∏‡•á parameters ‡§î‡§∞ metrics ‡§®‡§ø‡§ï‡§æ‡§≤‡•á‡§Ç
		run = client.get_run(run_id)
		
		# Parameters
		run_params = {}
		for key in PARAM_KEYS:
			if key in run.data.params:
				param_value = run.data.params[key]
				run_params[key] = normalize_param_value(param_value)
		
		# Metrics
		run_metrics = {}
		if METRIC_KEY in run.data.metrics:
			run_metrics[METRIC_KEY] = run.data.metrics[METRIC_KEY]

		return run_params, run_metrics
		
	except Exception as e:
		print(f"‚ö† Warning: Could not fetch info for version {version_number}: {e}")
		return {}, {}


def check_existing_version(client: MlflowClient, current_params: dict, current_metrics: dict):
	"""
	‡§ú‡§æ‡§Å‡§ö ‡§ï‡§∞‡§§‡§æ ‡§π‡•à ‡§ï‡§ø ‡§ï‡•ç‡§Ø‡§æ ‡§∏‡§Æ‡§æ‡§® ‡§™‡•à‡§∞‡§æ‡§Æ‡•Ä‡§ü‡§∞ ‡§î‡§∞ ‡§Æ‡•á‡§ü‡•ç‡§∞‡§ø‡§ï ‡§µ‡§æ‡§≤‡§æ ‡§Æ‡•â‡§°‡§≤ ‡§∏‡§Ç‡§∏‡•ç‡§ï‡§∞‡§£ ‡§™‡§π‡§≤‡•á ‡§∏‡•á ‡§π‡•Ä ‡§™‡§Ç‡§ú‡•Ä‡§ï‡•É‡§§ ‡§π‡•à‡•§
	"""
	
	current_metric_value = current_metrics.get(METRIC_KEY)
	
	if current_metric_value is None:
		print(f"‚ùå Error: Required metric '{METRIC_KEY}' not found in current run. Cannot perform metric comparison.")
		# ‡§Ø‡§¶‡§ø ‡§Æ‡•á‡§ü‡•ç‡§∞‡§ø‡§ï ‡§®‡§π‡•Ä‡§Ç ‡§Æ‡§ø‡§≤‡§§‡•Ä ‡§π‡•à, ‡§§‡•ã ‡§ï‡•á‡§µ‡§≤ ‡§™‡•à‡§∞‡§æ‡§Æ‡•Ä‡§ü‡§∞ ‡§§‡•Å‡§≤‡§®‡§æ ‡§™‡§∞ ‡§µ‡§æ‡§™‡§∏ ‡§Ü‡§è‡§Ç
		print("‚ÑπÔ∏è Falling back to parameters-only comparison.")
		current_metric_value = -1 # Sentinel value, won't match existing versions easily

	
	print(f"\nüîç Checking for existing versions with parameters: {current_params} and metric ({METRIC_KEY}): {current_metric_value:.6f}")
	
	try:
		# Unity Catalog ‡§Æ‡•á‡§Ç ‡§∏‡§≠‡•Ä model versions ‡§ñ‡•ã‡§ú‡•á‡§Ç
		filter_string = f"name = '{REGISTERED_MODEL_NAME}'"
		versions = client.search_model_versions(filter_string=filter_string)
		
		if not versions:
			print(f"‚ÑπÔ∏è Model '{REGISTERED_MODEL_NAME}' does not exist in Unity Catalog. Proceeding with registration.")
			return None
		
		print(f"üìã Found {len(versions)} existing version(s). Checking parameters and metrics...")
		
		for version in versions:
			try:
				# Version ‡§∏‡•á associated run ‡§ï‡•á parameters ‡§î‡§∞ metrics ‡§®‡§ø‡§ï‡§æ‡§≤‡•á‡§Ç
				version_params, version_metrics = get_run_info_from_version(
					client, 
					REGISTERED_MODEL_NAME, 
					version.version
				)
				
				version_metric_value = version_metrics.get(METRIC_KEY)

				print(f" ¬† Version {version.version}: Params={version_params}, {METRIC_KEY}={version_metric_value}")
				
				# 1. Parameters ‡§ï‡•Ä ‡§§‡•Å‡§≤‡§®‡§æ ‡§ï‡§∞‡•á‡§Ç (Exact Match)
				params_match = all(
					key in version_params and version_params[key] == current_params.get(key)
					for key in PARAM_KEYS
				)
				
				# 2. Metrics ‡§ï‡•Ä ‡§§‡•Å‡§≤‡§®‡§æ ‡§ï‡§∞‡•á‡§Ç (Match within Tolerance)
				metrics_match = False
				if version_metric_value is not None and current_metric_value != -1: 
					# abs(a - b) < tolerance
					if abs(current_metric_value - version_metric_value) <= METRIC_TOLERANCE:
						metrics_match = True

				
				if params_match and metrics_match:
					print("\n" + "#" * 70)
					print(f"‚è≠Ô∏è ¬†DUPLICATE DETECTED: Version {version.version} already exists")
					print(f" ¬† Parameters AND Metric ({METRIC_KEY}) match within tolerance.")
					print(f" ¬† Existing {METRIC_KEY}: {version_metric_value:.6f} | Current {METRIC_KEY}: {current_metric_value:.6f}")
					print(f" ¬† ‚úÖ Skipping registration to avoid unnecessary versioning.")
					print("#" * 70 + "\n")
					return version
				
			except Exception as e:
				print(f" ¬† ‚ö† Error checking version {version.version}: {e}")
				continue
				
		print("‚úì No duplicate (by Params + Metric) found. Proceeding with new registration.\n")
		return None 	# ‡§ï‡•ã‡§à ‡§°‡•Å‡§™‡•ç‡§≤‡•Ä‡§ï‡•á‡§ü ‡§®‡§π‡•Ä‡§Ç ‡§Æ‡§ø‡§≤‡§æ
		
	except Exception as e:
		error_msg = str(e).upper()
		if "RESOURCE_DOES_NOT_EXIST" in error_msg or "PERMISSION_DENIED" in error_msg:
			print(f"‚ÑπÔ∏è Model '{REGISTERED_MODEL_NAME}' not found or Permission Denied in UC.")
			print(f" 	 Proceeding with registration as first version.\n")
			return None
		
		# ‡§Ö‡§®‡•ç‡§Ø errors ‡§ï‡•á ‡§≤‡§ø‡§è
		print(f"‚ö† Warning: Error checking existing versions in UC: {e}")
		print(f" 	 Proceeding with registration (caution advised).\n")
		return None


def register_model_for_serving(client: MlflowClient, run_id: str, run_params: dict, run_metrics: dict, model_name: str, artifact_path: str):
	"""
	MLflow Run ‡§∏‡•á ‡§Æ‡•â‡§°‡§≤ ‡§ï‡•ã Unity Catalog Model Registry ‡§Æ‡•á‡§Ç ‡§™‡§Ç‡§ú‡•Ä‡§ï‡•É‡§§ ‡§ï‡§∞‡§§‡§æ ‡§π‡•à‡•§
	‡§™‡§π‡§≤‡•á ‡§°‡•Å‡§™‡•ç‡§≤‡•Ä‡§ï‡•á‡§ü ‡§ö‡•á‡§ï ‡§ï‡§∞‡§§‡§æ ‡§π‡•à‡•§
	"""
	
	# 1. ‡§°‡•Å‡§™‡•ç‡§≤‡•Ä‡§ï‡•á‡§ü ‡§ú‡§æ‡§Å‡§ö
	existing_version = check_existing_version(client, run_params, run_metrics)
	if existing_version:
		print(f"üéØ Using existing model version: {existing_version.name} v{existing_version.version}")
		return existing_version

	# 2. ‡§™‡§Ç‡§ú‡•Ä‡§ï‡§∞‡§£
	model_uri = f"runs:/{run_id}/{artifact_path}"
	print(f"‚è≥ Attempting to register NEW model version from URI: {model_uri}")
	
	try:
		# MLflow ‡§∏‡•ç‡§µ‡§§‡§É ‡§π‡•Ä UC ‡§ï‡§æ ‡§â‡§™‡§Ø‡•ã‡§ó ‡§ï‡§∞‡•á‡§ó‡§æ
		model_version = mlflow.register_model(
			model_uri=model_uri, 
			name=model_name
		)
		
		print("\n" + "=" * 60)
		print("‚úÖ UC MODEL REGISTRATION SUCCESSFUL!")
		print(f"‡§®‡§æ‡§Æ: {model_version.name}")
		print(f"‡§∏‡§Ç‡§∏‡•ç‡§ï‡§∞‡§£ (Version): {model_version.version}")
		print(f"‡§∏‡•ç‡§ü‡•á‡§ú (Stage): {model_version.current_stage}")
		print(f"Parameters: {run_params}")
		print(f"Metrics ({METRIC_KEY}): {run_metrics.get(METRIC_KEY)}")
		print("=" * 60 + "\n")
		
		return model_version
		
	except Exception as e:
		print(f"‚ùå UC ‡§Æ‡•â‡§°‡§≤ ‡§™‡§Ç‡§ú‡•Ä‡§ï‡§∞‡§£ ‡§µ‡§ø‡§´‡§≤ ‡§π‡•Å‡§Ü! ‡§§‡•ç‡§∞‡•Å‡§ü‡§ø: {e}")
		print("‡§∏‡•Å‡§®‡§ø‡§∂‡•ç‡§ö‡§ø‡§§ ‡§ï‡§∞‡•á‡§Ç ‡§ï‡§ø ‡§Ü‡§™‡§ï‡•á ‡§™‡§æ‡§∏ Unity Catalog ‡§Æ‡•á‡§Ç ‡§Æ‡•â‡§°‡§≤ ‡§¨‡§®‡§æ‡§®‡•á ‡§ï‡•Ä ‡§Ö‡§®‡•Å‡§Æ‡§§‡§ø ‡§π‡•à‡•§")
		sys.exit(1)


if __name__ == "__main__":
	
	print("\n" + "=" * 60)
	print("üöÄ MLFLOW MODEL REGISTRATION WITH DUPLICATE CHECK (Params + Metric)")
	print("=" * 60 + "\n")
	
	# UC ‡§ï‡•á ‡§≤‡§ø‡§è ‡§™‡•Å‡§∞‡§æ‡§®‡•á Workspace Registry ‡§ï‡•â‡§®‡•ç‡§´‡§º‡§ø‡§ó‡§∞‡•á‡§∂‡§® ‡§ï‡•ã ‡§π‡§ü‡§æ ‡§¶‡•á‡§Ç
	try:
		if "MLFLOW_REGISTRY_URI" in os.environ:
			del os.environ["MLFLOW_REGISTRY_URI"]
			print("‚úì Removed MLFLOW_REGISTRY_URI environment variable for UC registration.")
	except Exception as e:
		print(f"‚ö† Warning: Could not remove MLFLOW_REGISTRY_URI: {e}")

	# Mlflow ‡§ï‡•ç‡§≤‡§æ‡§á‡§Ç‡§ü ‡§ï‡•ã ‡§á‡§®‡§ø‡§∂‡§ø‡§Ø‡§≤‡§æ‡§á‡§ú‡§º ‡§ï‡§∞‡•á‡§Ç
	client = MlflowClient()

	# 1. ‡§∞‡§® ID, ‡§™‡•à‡§∞‡§æ‡§Æ‡•Ä‡§ü‡§∞ ‡§î‡§∞ ‡§Æ‡•á‡§ü‡•ç‡§∞‡§ø‡§ï‡•ç‡§∏ ‡§ï‡•ã ‡§∏‡•ç‡§µ‡§ö‡§æ‡§≤‡§ø‡§§ ‡§∞‡•Ç‡§™ ‡§∏‡•á ‡§™‡•ç‡§∞‡§æ‡§™‡•ç‡§§ ‡§ï‡§∞‡•á‡§Ç
	# ‡§Ø‡§π ‡§´‡§º‡§Ç‡§ï‡•ç‡§∂‡§® ‡§Ö‡§¨ 'best_n_estimators', 'test_rmse' ‡§Ü‡§¶‡§ø ‡§ï‡•ã ‡§´‡•á‡§ö ‡§ï‡§∞‡•á‡§ó‡§æ‡•§
	RUN_ID, CURRENT_RUN_PARAMS, CURRENT_RUN_METRICS = get_latest_run_info(client)
	
	if RUN_ID:
		# ‡§°‡•Å‡§™‡•ç‡§≤‡•Ä‡§ï‡•á‡§ü ‡§ö‡•á‡§ï ‡§î‡§∞ ‡§™‡§Ç‡§ú‡•Ä‡§ï‡§∞‡§£ (Registration)
		registered_version = register_model_for_serving(
			client, 
			RUN_ID, 
			CURRENT_RUN_PARAMS,
			CURRENT_RUN_METRICS,
			REGISTERED_MODEL_NAME, 
			MODEL_ARTIFACT_PATH
		)
		
		if registered_version:
			print(f"\n‚úÖ Final Result:")
			print(f" 	Model: {registered_version.name}")
			print(f" 	Version: {registered_version.version}")
			print(f"\nüí° ‡§Ö‡§ó‡§≤‡§æ ‡§ï‡§¶‡§Æ: ‡§á‡§∏ UC ‡§Æ‡•â‡§°‡§≤ ‡§∏‡§Ç‡§∏‡•ç‡§ï‡§∞‡§£ ‡§ï‡§æ ‡§â‡§™‡§Ø‡•ã‡§ó ‡§ï‡§∞‡§ï‡•á Serving Endpoint ‡§¨‡§®‡§æ‡§è‡§Å‡•§")
	else:
		print("‚ùå Registration aborted: Could not retrieve a valid Run ID.")
