In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from mlflow.models import infer_signature
import mlflow

### Model build-up

In [2]:
# data preparation
X, y = datasets.load_iris(return_X_y=True)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20)

# Define the model hyperparameters
params = {"penalty":"l2","solver": "lbfgs", "max_iter": 1000, "multi_class": "auto", "random_state": 8888}

def model_training_predict(params):
    # build logregression model
    lr = LogisticRegression(**params)
    # train model
    lr.fit(X_train, y_train)
    # test model
    y_pred = lr.predict(X_test)
    # calculate test accuracy
    accuracy = accuracy_score(y_test, y_pred)

    return lr, y_pred, accuracy

### MLFlow tracking

In [3]:
### MLFLOW tracking
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")     # ensure mlflow ui is running first

##create a new MLFLOW experiment
mlflow.set_experiment("iris_expt1")

2025/06/11 03:21:53 INFO mlflow.tracking.fluent: Experiment with name 'iris_expt1' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/163835588279346164', creation_time=1749626513956, experiment_id='163835588279346164', last_update_time=1749626513956, lifecycle_stage='active', name='iris_expt1', tags={}>

In [4]:
# 1st run

# Start a new MLflow tracking run
with mlflow.start_run():

    model, y_pred, accuracy = model_training_predict(params)

    # Log all hyperparameters used in training
    mlflow.log_params(params)

    # Log a single metric: accuracy of the model, allows MLflow to track and compare model performance over multiple runs.
    mlflow.log_metric("accuracy", accuracy)

    # Add a descriptive tag to help identify this run in MLflow UI
    mlflow.set_tag("run info", "1st run for iris project")

    # Infer the model signature (input/output schema),helps MLflow know what kind of input/output is expected, 
    # Improves reproducibility and compatibility for deployment
    signature = infer_signature(X_train, y_pred)

    # Log and register the trained scikit-learn model with MLflow
    model_info = mlflow.sklearn.log_model(      # Logs and saves a trained Scikit-learn model to MLflow’s tracking system, 
                                                #   with optional metadata like signatures and registration
        sk_model = model,                       # The trained model object
        artifact_path = "iris_model",           # Folder where the model artifact will be saved  (under mlruns/)
        signature = signature,                  # Specifies expected input/output schema for reproducibility
        input_example = X_train,                # Provides a sample input for inference documentation and testing
        registered_model_name = "iris_project"  # Track this model under the name "iris_project", and register it with a version number
    )

Registered model 'iris_project' already exists. Creating a new version of this model...
2025/06/11 03:21:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris_project, version 4


🏃 View run monumental-squid-926 at: http://127.0.0.1:5000/#/experiments/163835588279346164/runs/43684ecb2ef9418ba6ccbb1be6cc3cab
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/163835588279346164


Created version '4' of model 'iris_project'.


In [5]:
# 2nd run: change the parameter by taking out penalty

with mlflow.start_run():
    
    params = {"solver": "lbfgs", "max_iter": 1000, "multi_class": "auto", "random_state": 8888}
    model, y_pred, accuracy = model_training_predict(params)

    # log the parameters
    mlflow.log_params(params)

    # log the accuracy metrics
    mlflow.log_metric("accuracy", accuracy)

    # set a tag that we can use to remind what this run was for
    mlflow.set_tag("run info", "1st run for iris project")

    # infer the model signature
    signature = infer_signature(X_train, y_pred)

    model_info = mlflow.sklearn.log_model(
        sk_model = model,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
        registered_model_name = "iris_project"
    )

Registered model 'iris_project' already exists. Creating a new version of this model...
2025/06/11 03:21:56 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris_project, version 5


🏃 View run luxuriant-fly-55 at: http://127.0.0.1:5000/#/experiments/163835588279346164/runs/d4814a3f4d2b4d1d92fd2dc63930917d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/163835588279346164


Created version '5' of model 'iris_project'.


### MLFlow Validating and Inferencing

In [6]:
# Approach 1: load model with model_uri
import mlflow
from mlflow.models import Model

# since model_info was created, us this line can also return the path where model was saved.
model_uri = model_info.model_uri

# The model is logged with an input example
pyfunc_model = mlflow.pyfunc.load_model(model_uri)
input_data = pyfunc_model.input_example

pyfunc_model.predict(input_data)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 4861.75it/s]


array([2, 0, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 1, 0, 1, 1, 0, 1, 2,
       1, 2, 2, 1, 1, 1, 2, 0, 2, 1, 1, 0, 0, 0, 0, 1, 1, 2, 1, 0, 1, 0,
       2, 2, 0, 1, 1, 1, 2, 1, 2, 2, 0, 1, 0, 1, 0, 0, 2, 1, 1, 2, 2, 0,
       0, 0, 2, 1, 1, 0, 2, 1, 1, 2, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 2, 2,
       2, 2, 0, 0, 1, 1, 2, 1, 1, 1, 2, 2, 1, 2, 0, 0, 2, 0, 1, 0, 1, 2,
       1, 1, 1, 2, 2, 0, 0, 0, 2, 0])

In [7]:
# Approach 2: load model from model repository
model_name = "iris_project"     # use the registered_model_name
model_version = "2"             # If you have multiple versions registered (e.g., v1, v2...), just change for "1", "2", or "3", 
                                # Value "latest" will load the latest version.
model_path = f'models:/{model_name}/{model_version}'
load_model = mlflow.sklearn.load_model(model_path)     # load with mlflow.sklearn, instead of mlflow.pyfunc
load_model.predict(X_test)

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 470.78it/s]


array([0, 1, 0, 2, 2, 2, 0, 2, 2, 2, 1, 1, 1, 0, 2, 0, 1, 1, 2, 2, 1, 1,
       1, 1, 2, 2, 2, 0, 0, 1])

In [8]:
#run 3:  change solver to "sag", do not reisgter model in codes

with mlflow.start_run():
    
    params = {"solver": "sag", "max_iter": 1000, "multi_class": "auto", "random_state": 8888}
    model, y_pred, accuracy = model_training_predict(params)

    mlflow.log_params(params)

    mlflow.log_metric("accuracy", accuracy)

    mlflow.set_tag("run info", "1st run for iris project")

    signature = infer_signature(X_train, y_pred)

    model_info = mlflow.sklearn.log_model(
        sk_model = model,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
    )



🏃 View run defiant-bear-408 at: http://127.0.0.1:5000/#/experiments/163835588279346164/runs/a65365a25d714cdfbdfca76bc80c5169
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/163835588279346164
