In [None]:
import pandas as pd
import numpy as np
import joblib
import mlflow
import mlflow.xgboost
import xgboost as xgb

In [None]:
df = pd.read_parquet('/home/sagemaker-user/mlops-sandbox-repo/data_sample.parquet')

In [None]:
target_col = 'target' 
X = df.drop(columns=[target_col])
y = df[target_col]

In [None]:
mlflow.set_tracking_uri("arn:aws:sagemaker:us-east-2:168264144360:mlflow-tracking-server/mlflow-tracker")

In [None]:
model = joblib.load("xgb_model/model.pkl")

In [None]:
from mlflow.models import infer_signature
import mlflow.xgboost


X_sample = X.iloc[0:1]
y_sample = model.predict(X_sample)
signature = infer_signature(X_sample, y_sample)


mlflow.set_experiment("Model_reg_exp")

with mlflow.start_run() as run:
    mlflow.xgboost.log_model(
        xgb_model=model,
        artifact_path="xgb-model",
        signature=signature
    )
    model_uri = f"runs:/{run.info.run_id}/xgb-model"
    
    # 🔽 This is the key step
    mv = mlflow.register_model(
        model_uri=model_uri,
        name="MyXGBoostModel"
    )

    print(f"Model registered as: {mv.name}, version: {mv.version}")


In [None]:
import mlflow.xgboost

# Load by name and version
load_model = mlflow.xgboost.load_model("models:/MyXGBoostModel/1")

load_model.predict(X.iloc[1:2])


In [5]:
from pydantic import BaseModel, model_validator
from typing import Optional
import mlflow

class ModelConfig(BaseModel):
    """
    Configuration model for loading an MLflow model from the Model Registry.

    Attributes:
        model_name (str): The registered name of the model in MLflow.
        version (Optional[str]): The specific version of the model to load. Mutually exclusive with `stage`.
        stage (Optional[str]): The stage of the model to load (e.g., 'Production', 'Staging'). Mutually exclusive with `version`.
        tracking_uri (Optional[str]): Optional URI for the MLflow tracking server. Defaults to local if not provided.
    """

    model_name: str
    version: Optional[str] = None
    stage: Optional[str] = None
    tracking_uri: Optional[str] = None

    @model_validator(mode="after")
    def validate_stage_or_version(self):
        """
        Validates that either 'stage' or 'version' is provided (but not both).
        Defaults to 'Production' stage if neither is specified.
        """
        if self.version and self.stage:
            raise ValueError("Specify only one of 'version' or 'stage', not both.")
        if not self.version and not self.stage:
            self.stage = "Production"
        return self


def load_model_from_registry(config: ModelConfig):
    """
    Loads a model from the MLflow Model Registry based on the provided configuration.

    Args:
        config (ModelConfig): A validated configuration object containing model name,
                              version or stage, and optional tracking URI.

    Returns:
        mlflow.pyfunc.PyFuncModel: The loaded MLflow model, ready for inference.

    Raises:
        RuntimeError: If the model cannot be loaded from the registry.
        ValueError: If both 'stage' and 'version' are provided in the configuration.

    Example:
        >>> config = ModelConfig(model_name="HeartDiseaseModel", stage="Production")
        >>> model = load_model_from_registry(config)
        >>> model.predict(pd.DataFrame([[1, 2, 3, 4]]))
    """
    if config.tracking_uri:
        mlflow.set_tracking_uri(config.tracking_uri)

    model_uri = f"models:/{config.model_name}/{config.stage or config.version}"
    print(f"[INFO] Loading model from: {model_uri}")

    try:
        model = mlflow.pyfunc.load_model(model_uri)
    except Exception as e:
        raise RuntimeError(f"Failed to load model from {model_uri}: {e}")

    return model


In [11]:
import yaml

# Load the YAML file and access the 'mlflow' section
with open("config.yaml", "r") as f:
    full_config = yaml.safe_load(f)

mlflow_config = full_config.get("mlflow", {})

# Pass the inner config to Pydantic
config = ModelConfig(**mlflow_config)

# Load the model
model = load_model_from_registry(config)

[INFO] Loading model from: models:/MyXGBoostModel/1


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [7]:
import yaml

# ✅ Step 1: Read values from config.yaml
with open("config.yaml", "r") as f:
    config = yaml.safe_load(f)

mlflow_cfg = config["mlflow"]
model_name = mlflow_cfg["model_name"]
stage = mlflow_cfg.get("stage")
version = mlflow_cfg.get("version")
tracking_uri = mlflow_cfg.get("tracking_uri")

# Step 2: Load the model using the function
model = load_model_from_registry(
    model_name=model_name,
    #stage=stage,
    version=version,  # We can either use stage or version
    tracking_uri=tracking_uri
)

TypeError: load_model_from_registry() got an unexpected keyword argument 'model_name'

In [None]:
loaded_model = load_model_from_registry("MyXGBoostModel", "1", tracking_uri="arn:aws:sagemaker:us-east-2:168264144360:mlflow-tracking-server/mlflow-tracker")

In [4]:
model.predict(X.iloc[1:2])

NameError: name 'X' is not defined