In [None]:
# Realtime Inference: Iris RandomForest via Databricks Model Serving
from databricks.sdk import WorkspaceClient

# Configuration
ENDPOINT_NAME = "iris-rf-realtime-endpoint"
REGISTERED_MODEL = "workspace.iris.iris_rf_classifier"
MODEL_ALIAS = "champion"
WORKLOAD_SIZE = "Small"
SCALE_TO_ZERO = True

w = WorkspaceClient()

served_model_name = "iris_rf_served_model"

config = {
    "served_models": [
        {
            "name": served_model_name,
            "model_name": REGISTERED_MODEL,
            "model_alias": MODEL_ALIAS,
            "workload_size": WORKLOAD_SIZE,
            "scale_to_zero_enabled": SCALE_TO_ZERO
        }
    ],
    "traffic_config": {
        "routes": [
            {"served_model_name": served_model_name, "traffic_percentage": 100}
        ]
    }
}

# Upsert endpoint
try:
    endpoint = w.serving_endpoints.create(name=ENDPOINT_NAME, config=config)
    print(f"Created endpoint '{ENDPOINT_NAME}'.")
except Exception:
    # If it already exists, update its config
    endpoint = w.serving_endpoints.update_config(
        name=ENDPOINT_NAME,
        served_models=config["served_models"],
        traffic_config=config["traffic_config"]
    )
    print(f"Updated endpoint '{ENDPOINT_NAME}'.")


In [None]:
import time
from databricks.sdk.service.serving import EndpointCoreConfigSummaryConfig

# Wait for endpoint to be ready
print(f"Waiting for endpoint '{ENDPOINT_NAME}' to be ready...")

terminal_states = {"READY", "UPDATE_FAILED"}
start_time = time.time()

def _get_state():
    ep = w.serving_endpoints.get(ENDPOINT_NAME)
    return ep.state.ready, ep

while True:
    ready_state, ep = _get_state()
    # ready_state examples: "READY", "NOT_READY", "UPDATE_FAILED"
    print(f"State: {ready_state}")
    if ready_state in terminal_states:
        break
    time.sleep(10)

elapsed = int(time.time() - start_time)
print(f"Endpoint state after {elapsed}s: {ready_state}")

if ready_state != "READY":
    raise RuntimeError(f"Endpoint failed to become READY. Last state: {ready_state}")

# Show served models summary
summary = w.serving_endpoints.get(ENDPOINT_NAME)
print(summary)


In [None]:
import json
import pandas as pd

# Prepare a sample Iris row (sepal_length, sepal_width, petal_length, petal_width)
sample = pd.DataFrame([
    {"sepal_length": 5.1, "sepal_width": 3.5, "petal_length": 1.4, "petal_width": 0.2}
])

# Invoke endpoint
# "inputs" payload supports 'dataframe_records' for pandas-style records
payload = {
    "inputs": [
        {
            "name": "inputs",
            "shape": [len(sample), len(sample.columns)],
            "datatype": "FP32",
            "dataframe_records": sample.to_dict(orient="records")
        }
    ]
}

invocation = w.serving_endpoints.query(name=ENDPOINT_NAME, dataframe_records=payload["inputs"][0]["dataframe_records"])  # type: ignore
print("Raw response:\n", invocation)

# Best-effort parse of common MLflow serving output formats
try:
    # Newer SDKs return structured QueryEndpointResponse
    preds = getattr(invocation, "predictions", None)
    if preds is None and isinstance(invocation, dict):
        preds = invocation.get("predictions")
    print("Predictions:", preds)
except Exception as e:
    print("Could not parse predictions:", e)
    print(invocation)
