In [None]:
import logging
import time

# Configure logger
logger: logging.Logger = logging.getLogger("register_model_logger")
logger.setLevel(logging.INFO)
logger.propagate = False  # Prevent duplicate logs from parent loggers

# Set formatter
formatter: logging.Formatter = logging.Formatter(
    fmt="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S"
)

# Configure and attach stream handler
stream_handler: logging.StreamHandler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)

In [None]:
start_time = time.time()  

logger.info("Model registration notebook execution started.")

<h1 style="text-align: center; font-size: 50px;"> Fine-Tuned Model Registration Service </h1>

This notebook demonstrates how to register a fine-tuned LLM comparison service that allows switching between base and fine-tuned models through a single MLflow endpoint. This follows the same pattern used across all AI-Blueprints for consistent model deployment and serving.

In [None]:
!pip install -r ../requirements.txt --quiet

In [None]:
import os
import sys
import yaml
from pathlib import Path
import logging
import warnings
import mlflow

# Add the core directory to the path to import utils
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

# ===============================
# 🚀 Deployment & Registration
# ===============================
from core.deploy.deploy_fine_tuning import register_llm_comparison_model

# ===============================
# ⚙️ Utility Functions
# ===============================
from src.utils import (
    load_config_and_secrets,
    configure_proxy,
    get_configs_dir,
    get_fine_tuned_models_dir
)

## Configuration

In [None]:
# Suppress Python warnings
warnings.filterwarnings("ignore")

In [None]:
# Configuration paths and parameters
CONFIG_PATH = str(get_configs_dir() / "config.yaml")
SECRETS_PATH = str(get_configs_dir() / "secrets.yaml")
MLFLOW_EXPERIMENT_NAME = "AIStudio-Fine-Tuning-Experiment"
MODEL_SERVICE_RUN_NAME = "AIStudio-Fine-Tuning-Service-Run"
MODEL_SERVICE_NAME = "AIStudio-Fine-Tuning-Model"

# Model configuration - update these based on your training
BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Update to match your base model
FINE_TUNED_MODEL_NAME = "Orpo-TinyLlama-1.1B-Chat-v1.0-FT"  # Update to match your fine-tuned model

logger.info(f"Base model: {BASE_MODEL}")
logger.info(f"Fine-tuned model: {FINE_TUNED_MODEL_NAME}")

In [None]:
# Load configuration and configure proxy if needed
config, secrets = load_config_and_secrets(CONFIG_PATH, SECRETS_PATH)
configure_proxy(config)

logger.info("✅ Configuration loaded successfully")

## Verify Model Assets

Before registering the models, let's verify that both the base model and fine-tuned model are accessible.

In [None]:
def verify_model_assets():
    """Verify that the required model assets are available."""
    
    # Check fine-tuned model directory
    fine_tuned_dir = get_fine_tuned_models_dir()
    fine_tuned_path = fine_tuned_dir / FINE_TUNED_MODEL_NAME
    
    if fine_tuned_path.exists():
        logger.info(f"✅ Fine-tuned model found: {fine_tuned_path}")
    else:
        logger.warning(f"⚠️ Fine-tuned model not found: {fine_tuned_path}")
        logger.info("Please run the run-workflow.ipynb notebook first to create the fine-tuned model")
        return False
    
    # Base model is typically a HuggingFace model ID, so we don't need to check its existence
    logger.info(f"✅ Base model ID: {BASE_MODEL}")
    
    return True

# Verify assets
assets_verified = verify_model_assets()

## Model Registration Service

This section demonstrates how to register the LLM comparison model that allows switching between the base and fine-tuned models through a single API endpoint. The service is automatically documented using Swagger (via MLflow) and provides:

- **Base Model Inference**: Access to the original pre-trained model
- **Fine-Tuned Model Inference**: Access to the ORPO fine-tuned model  
- **Comparison Mode**: Switch between models using the `use_finetuning` parameter
- **Flexible Input**: Support for custom prompts and generation parameters

The registered model follows the same pattern used across all AI-Blueprints for consistent deployment.

In [None]:
# Set MLflow tracking URI and experiment
mlflow.set_tracking_uri('/phoenix/mlflow')
mlflow.set_experiment(MLFLOW_EXPERIMENT_NAME)

if assets_verified:
    try:
        # Register the LLM comparison model
        register_llm_comparison_model(
            model_base_path=BASE_MODEL,
            model_finetuned_path=FINE_TUNED_MODEL_NAME,
            experiment=MLFLOW_EXPERIMENT_NAME,
            run_name=MODEL_SERVICE_RUN_NAME,
            registry_name=MODEL_SERVICE_NAME,
            config_path=CONFIG_PATH
        )
        
        logger.info("✅ LLM comparison model registered successfully!")
        logger.info(f"Model name: {MODEL_SERVICE_NAME}")
        logger.info(f"Experiment: {MLFLOW_EXPERIMENT_NAME}")
        
    except Exception as e:
        logger.error(f"❌ Failed to register comparison model: {str(e)}")
        logger.info("Please check the error details above and ensure all dependencies are installed")
        
else:
    logger.error("❌ Cannot register model - required assets not found")
    logger.info("Please run the run-workflow.ipynb notebook first to create the fine-tuned model")

## Usage Instructions

Once the model is registered, you can use it through the MLflow model serving interface. Here's how to interact with the registered model:

### Input Format
The model expects a pandas DataFrame with the following columns:
- `prompt` (string): The text prompt to generate from
- `use_finetuning` (boolean): Whether to use the fine-tuned model (True) or base model (False)
- `max_tokens` (integer, optional): Maximum number of tokens to generate (default: 128)

### Example Usage
```python
import pandas as pd
import mlflow

# Load the registered model
model = mlflow.pyfunc.load_model(f"models:/{MODEL_SERVICE_NAME}/latest")

# Create input data
input_data = pd.DataFrame({
    "prompt": ["Explain the importance of sustainable agriculture."],
    "use_finetuning": [True],  # Use fine-tuned model
    "max_tokens": [200]
})

# Generate response
response = model.predict(input_data)
print(response["response"].iloc[0])
```

### Comparison Mode
You can easily compare outputs by running the same prompt with different `use_finetuning` values:

```python
# Compare base vs fine-tuned
prompts = ["Your test prompt here"]

for use_ft in [False, True]:
    input_data = pd.DataFrame({
        "prompt": prompts,
        "use_finetuning": [use_ft],
        "max_tokens": [150]
    })
    response = model.predict(input_data)
    model_type = "Fine-tuned" if use_ft else "Base"
    print(f"{model_type} Model: {response['response'].iloc[0]}")
```

In [None]:
end_time: float = time.time()
elapsed_time: float = end_time - start_time
elapsed_minutes: int = int(elapsed_time // 60)
elapsed_seconds: float = elapsed_time % 60

logger.info(f"⏱️ Total execution time: {elapsed_minutes}m {elapsed_seconds:.2f}s")
logger.info("✅ Model registration notebook execution completed successfully.")

Built with ❤️ using [**HP AI Studio**](https://hp.com/ai-studio).