In [1]:
import mlflow

In [None]:
First I need a model to deploy

import os
from typing import Any, Optional

# from custom_code import iris_classes
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

import mlflow
from mlflow.models import infer_signature

flower_classes = ["setosa", "versicolor", "virginica"]


def iris_classes(preds):
    return [flower_classes[x] for x in preds]


class CustomPredict(mlflow.pyfunc.PythonModel):
    """Custom pyfunc class used to create customized mlflow models"""

    def load_context(self, context):
        self.model = mlflow.sklearn.load_model(context.artifacts["custom_model"])

    def predict(self, context, model_input, params: Optional[dict[str, Any]] = None):
        prediction = self.model.predict(model_input)
        return iris_classes(prediction)


X, y = load_iris(return_X_y=True, as_frame=True)
params = {"C": 1.0, "random_state": 42}
classifier = LogisticRegression(**params).fit(X, y)

predictions = classifier.predict(X)
signature = infer_signature(X, predictions)

with mlflow.start_run(run_name="test_pyfunc") as run:
    model_info = mlflow.sklearn.log_model(
        sk_model=classifier, artifact_path="model", signature=signature
    )

    # start a child run to create custom imagine model
    with mlflow.start_run(run_name="test_custom_model", nested=True):
        print(f"Pyfunc run ID: {run.info.run_id}")
        # log a custom model
        mlflow.pyfunc.log_model(
            artifact_path="artifacts",
            code_paths=[os.getcwd()],
            artifacts={"custom_model": model_info.model_uri},
            python_model=CustomPredict(),
            signature=signature,
        )


In [None]:
# Input and output specifications examples
# Compatible with OpenAI spec
input = {
    "messages": [{"role": "user", "content": "What is MLflow?"}],
    "max_tokens": 25,
}
output = {
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": "MLflow is an open-source platform for machine learning (ML) and artificial intelligence (AI). It's designed to manage,",
            },
            "finish_reason": "stop",
        }
    ],
    "model": "llama3.2:1b",
    "object": "chat.completion",
    "created": 1729190863,
}


In [2]:
# simple chat model

from mlflow.pyfunc import ChatModel
from mlflow.types.llm import ChatMessage, ChatCompletionResponse, ChatChoice, ChatParams


class MySimpleChatModel(ChatModel):
    def __init__(self):
        self.model_name = "lucag-first_chat_model-v1"
        # self.client = None
        
    def load_context(self, context):
        pass
    
    def predict(
        self, context, messages: list[ChatMessage], params: ChatParams
    ) -> ChatCompletionResponse:
        return ChatCompletionResponse(
            object="chat.completion",
            model=self.model_name,
            # created=1729190863,
            choices=[
                ChatChoice(
                    index=0,
                    message=ChatMessage(
                        role="assistant",
                        content="Beautiful day today!",
                    ),
                    finish_reason="stop",
                )
            ],
        )
    
my_model = MySimpleChatModel()

In [4]:
my_model.predict(None, [ChatMessage(role="user", content="Hello!")], ChatParams())

ChatCompletionResponse(choices=[ChatChoice(message=ChatMessage(role='assistant', content='Beautiful day today!', refusal=None, name=None, tool_calls=None, tool_call_id=None), index=0, finish_reason='stop', logprobs=None)], usage=None, id=None, model='lucag-first_chat_model', object='chat.completion', created=1736858982, custom_outputs=None)

In [5]:
mlflow.set_experiment("chatmodel-test")

with mlflow.start_run():
    model_info = mlflow.pyfunc.log_model(
        "lucag-first_chat_model",
        python_model=my_model,
        input_example={
          "messages": [{"role": "user", "content": "Hello!"}],  
        },
    )

2025/01/14 13:56:49 INFO mlflow.tracking.fluent: Experiment with name 'chatmodel-test' does not exist. Creating a new experiment.
2025/01/14 13:56:49 INFO mlflow.pyfunc: Predicting on input example to validate output


In [None]:
# result = mlflow.register_model(
#     "runs:/d16076a3ec534311817565e6527539c0/sklearn-model", "sk-learn-random-forest-reg"
# )

In [8]:
print(dir(model_info))
print(model_info.model_uri)

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__firstlineno__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__static_attributes__', '__str__', '__subclasshook__', '__weakref__', '_artifact_path', '_env_vars', '_flavors', '_metadata', '_mlflow_version', '_model_uri', '_model_uuid', '_registered_model_version', '_run_id', '_saved_input_example_info', '_signature', '_signature_dict', '_utc_time_created', 'artifact_path', 'env_vars', 'flavors', 'metadata', 'mlflow_version', 'model_uri', 'model_uuid', 'registered_model_version', 'run_id', 'saved_input_example_info', 'signature', 'signature_dict', 'utc_time_created']
runs:/b9bac7ff6622442ca0863fd2563ab383/lucag-first_chat_model


In [9]:
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

In [15]:
print(dir(loaded_model))
print(loaded_model.metadata)

['_PyFuncModel__model_impl', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__firstlineno__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__static_attributes__', '__str__', '__subclasshook__', '__weakref__', '_model_impl', '_model_meta', '_predict', '_predict_fn', '_predict_stream', '_predict_stream_fn', '_try_get_or_generate_prediction_context', 'get_raw_model', 'loader_module', 'metadata', 'model_config', 'predict', 'predict_stream', 'unwrap_python_model']
artifact_path: lucag-first_chat_model
flavors:
  python_function:
    cloudpickle_version: 3.1.0
    code: null
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.pyfunc.loaders.chat_model
    python_model: python_model.pkl
    python_version: 3.13.0
    streamable: tru

In [16]:
result = loaded_model.predict(
    data={
        "messages": [{"role": "user", "content": "What is MLflow?"}],
        "max_tokens": 25,
    }
)
print(result)

{'choices': [{'message': {'role': 'assistant', 'content': 'Beautiful day today!'}, 'index': 0, 'finish_reason': 'stop'}], 'model': 'lucag-first_chat_model', 'object': 'chat.completion', 'created': 1736859757}


Ok, we have a model now. Let's see how to deploy it.

Let's try this:

`mlflow models serve -m runs:/<run_id>/model -p 5000`

Serv locally with 
```
mlflow models serve -m runs:/b9bac7ff6622442ca0863fd2563ab383/lucag-first_chat_model -p 5000 --env-manager=local
```

Test in PowerShell with:

```
curl http://127.0.0.1:5000/invocations -Method Post -ContentType application/json -Body '{"inputs": {"messages": [{"role": "user", "content": "Tell a joke!"}]}}'
```

In [22]:
import json
import requests

payload = json.dumps(
    {
        "inputs": {"messages": [{"role": "user", "content": "Tell a joke!"}]},
        "params": {
            "temperature": 0.5,
            "max_tokens": 20,
        },
    }
)
response = requests.post(
    url=f"http://localhost:5000/invocations",
    data=payload,
    headers={"Content-Type": "application/json"},
)
print(response.json())

{'predictions': {'choices': [{'message': {'role': 'assistant', 'content': 'Beautiful day today!'}, 'index': 0, 'finish_reason': 'stop'}], 'model': 'lucag-first_chat_model', 'object': 'chat.completion', 'created': 1736864331}}
