# MLflow Model Registry ‚Äî P2T2 Î™®Îç∏ Î≤ÑÏ†Ñ Í¥ÄÎ¶¨

> **Purpose:** P2T2 ÌååÏù¥ÌîÑÎùºÏù∏Ïùò AI Î™®Îç∏ÏùÑ Unity Catalog Model RegistryÏóê Îì±Î°ùÌï©ÎãàÎã§.
>
> **Îì±Î°ù Î™®Îç∏:**
> - `P2T2.ai_results.soap_generator` ‚Äî Azure OpenAI GPT-5.1 SOAP ÎÖ∏Ìä∏ ÏÉùÏÑ±
> - `P2T2.ai_results.biomedclip_matcher` ‚Äî BioMedCLIP ÏòÅÏÉÅ-ÌÖçÏä§Ìä∏ Îß§Ïπ≠
> - `P2T2.ai_results.judge_evaluator` ‚Äî LLM-as-a-Judge ÌíàÏßà ÌèâÍ∞Ä

## 0. ÏÑ§Ï†ï

In [None]:

import mlflow
import mlflow.pyfunc
from mlflow.tracking import MlflowClient
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec
from datetime import datetime
import json
import pandas as pd

spark.sql("USE CATALOG P2T2")
client = MlflowClient()

# Unity Catalog 3Îã®Í≥Ñ Ïù¥Î¶Ñ: catalog.schema.model
REGISTRY = {
    "soap": "P2T2.ai_results.soap_generator",
    "clip": "P2T2.ai_results.biomedclip_matcher",
    "judge": "P2T2.ai_results.judge_evaluator",
}

# Unity Catalog ÌïÑÏàò: ÏûÖÏ∂úÎ†• Signature
input_schema = Schema([ColSpec("string", "patient_id"), ColSpec("string", "input_text")])
output_schema = Schema([ColSpec("string", "result")])
SIGNATURE = ModelSignature(inputs=input_schema, outputs=output_schema)

# Unity Catalog Model Registry ÏÇ¨Ïö© ÏÑ§Ï†ï
mlflow.set_registry_uri("databricks-uc")

print("üîß Model Registry ÏÑ§Ï†ï ÏôÑÎ£å")
print(f"   Tracking URI: {mlflow.get_tracking_uri()}")
print(f"   Registry URI: {mlflow.get_registry_uri()}")

## 1. PyFunc ÎûòÌçº ÌÅ¥ÎûòÏä§ Ï†ïÏùò

In [None]:

class SOAPGeneratorModel(mlflow.pyfunc.PythonModel):
    """Azure OpenAI Í∏∞Î∞ò SOAP ÎÖ∏Ìä∏ ÏÉùÏÑ± Î™®Îç∏ ÎûòÌçº"""
    def load_context(self, context):
        self.model_version = "gpt-51-deploy"
    def predict(self, context, model_input):
        results = []
        for _, row in model_input.iterrows():
            results.append({"result": f"SOAP note for {row.get('patient_id', 'unknown')}"})
        return pd.DataFrame(results)

class BioMedCLIPModel(mlflow.pyfunc.PythonModel):
    """BioMedCLIP ÏòÅÏÉÅ-ÌÖçÏä§Ìä∏ Îß§Ïπ≠ Î™®Îç∏ ÎûòÌçº"""
    def load_context(self, context):
        self.model_name = "BiomedCLIP-PubMedBERT_256"
    def predict(self, context, model_input):
        results = []
        for _, row in model_input.iterrows():
            results.append({"result": f"BioMedCLIP match for {row.get('patient_id', 'unknown')}"})
        return pd.DataFrame(results)

class JudgeEvaluatorModel(mlflow.pyfunc.PythonModel):
    """LLM-as-a-Judge ÌèâÍ∞Ä Î™®Îç∏ ÎûòÌçº"""
    def load_context(self, context):
        self.judge_model = "gpt-51-deploy"
    def predict(self, context, model_input):
        results = []
        for _, row in model_input.iterrows():
            results.append({"result": f"Judge eval for {row.get('patient_id', 'unknown')}"})
        return pd.DataFrame(results)

print("‚úÖ PyFunc ÎûòÌçº ÌÅ¥ÎûòÏä§ 3Í∞ú Ï†ïÏùò ÏôÑÎ£å")

## 2. Î™®Îç∏ Î°úÍπÖ Î∞è Registry Îì±Î°ù

In [None]:

mlflow.set_experiment("/Shared/P2T2_Model_Registry")

registered_models = {}

# ‚îÄ‚îÄ 1. SOAP Generator ‚îÄ‚îÄ
with mlflow.start_run(run_name="soap_generator_registration"):
    df_soap = spark.table("P2T2.ai_results.openai_soap_notes")
    mlflow.log_params({"model_name": "gpt-51-deploy", "api_version": "2025-04-01-preview", "temperature": "0.3"})
    mlflow.log_metrics({"total_notes_generated": df_soap.count()})
    mlflow.set_tags({"project": "P2T2", "model_category": "llm_soap"})
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=SOAPGeneratorModel(),
        signature=SIGNATURE,
        registered_model_name=REGISTRY["soap"],
    )
    registered_models["soap"] = mlflow.active_run().info.run_id
    print(f"‚úÖ {REGISTRY['soap']} Îì±Î°ù ÏôÑÎ£å")

# ‚îÄ‚îÄ 2. BioMedCLIP ‚îÄ‚îÄ
with mlflow.start_run(run_name="biomedclip_registration"):
    df_clip = spark.table("P2T2.ai_results.biomedclip_results")
    mlflow.log_params({"model_name": "BiomedCLIP-PubMedBERT_256", "embedding_dim": "512", "image_size": "224x224"})
    mlflow.log_metrics({"total_matches": df_clip.count()})
    mlflow.set_tags({"project": "P2T2", "model_category": "multimodal_clip"})
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=BioMedCLIPModel(),
        signature=SIGNATURE,
        registered_model_name=REGISTRY["clip"],
    )
    registered_models["clip"] = mlflow.active_run().info.run_id
    print(f"‚úÖ {REGISTRY['clip']} Îì±Î°ù ÏôÑÎ£å")

# ‚îÄ‚îÄ 3. Judge Evaluator ‚îÄ‚îÄ
with mlflow.start_run(run_name="judge_evaluator_registration"):
    df_judge = spark.table("P2T2.ai_results.judge_evaluation")
    mlflow.log_params({"judge_model": "gpt-51-deploy", "criteria": "accuracy,completeness,safety,actionability,relevance", "scoring_scale": "1-5"})
    mlflow.log_metrics({"total_evaluations": df_judge.count()})
    mlflow.set_tags({"project": "P2T2", "model_category": "llm_judge"})
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=JudgeEvaluatorModel(),
        signature=SIGNATURE,
        registered_model_name=REGISTRY["judge"],
    )
    registered_models["judge"] = mlflow.active_run().info.run_id
    print(f"‚úÖ {REGISTRY['judge']} Îì±Î°ù ÏôÑÎ£å")

print(f"\nüì¶ Ï¥ù {len(registered_models)}Í∞ú Î™®Îç∏ Îì±Î°ù ÏôÑÎ£å")

## 3. Î™®Îç∏ Alias ÏÑ§Ï†ï

In [None]:

for key, name in REGISTRY.items():
    try:
        versions = client.search_model_versions(f"name='{name}'")
        if not versions:
            print(f"‚ö†Ô∏è {name}: Î≤ÑÏ†Ñ ÏóÜÏùå")
            continue
        latest = max(versions, key=lambda v: int(v.version))
        client.set_registered_model_alias(name=name, alias="staging", version=latest.version)
        print(f"‚úÖ {name} v{latest.version} ‚Üí @staging")
    except Exception as e:
        print(f"‚ÑπÔ∏è {name}: alias Ïä§ÌÇµ ({e})")

## 4. Registry ÌòÑÌô© Î¶¨Ìè¨Ìä∏

In [None]:

print("=" * 70)
print("üì¶ P2T2 MLflow Model Registry ÌòÑÌô©")
print("=" * 70)

for key, name in REGISTRY.items():
    try:
        model = client.get_registered_model(name)
        versions = client.search_model_versions(f"name='{name}'")
        print(f"\n  üîπ {name}")
        print(f"     Ï¥ù Î≤ÑÏ†Ñ: {len(versions)}")
        for v in versions:
            aliases = v.aliases if hasattr(v, 'aliases') else []
            alias_str = f" (@{', @'.join(aliases)})" if aliases else ""
            print(f"     v{v.version}: {v.status}{alias_str}")
    except Exception:
        print(f"\n  ‚ö™ {name}: ÎØ∏Îì±Î°ù")

print(f"\n{'=' * 70}")
print("  ‚úÖ ÏôÑÎ£å! Databricks UI ‚Üí Models ÌÉ≠ÏóêÏÑú ÌôïÏù∏ Í∞ÄÎä•")
print("=" * 70)