In [82]:
import pandas as pd
import numpy as np
import mlflow
import mlflow.models
from mlflow.models import infer_signature
import mlflow.sklearn
import os
import warnings
import logging
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score, f1_score, accuracy_score, precision_score
from urllib.parse import urlparse

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [83]:
def eval_metrics(actual, pred):
    recall = recall_score(y_test, y_pred, average= 'weighted'),
    f1 = f1_score(y_test, y_pred, average= 'weighted'),
    accuracy = accuracy_score(y_test, y_pred),
    precision = precision_score(y_test, y_pred, average= 'weighted')
    
    return recall, f1, accuracy, precision

#average: Literal['micro', 'macro', 'samples', 'weighted', 'binary'

In [84]:
if __name__ == "__main__":
    
    np.random.seed(40)
    
    try:
        iris = load_iris()
        X = pd.DataFrame(data = iris.data, columns= iris.feature_names)
        y = iris.target
        
        # Split the data into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42)
        print(X.shape)
         
    except Exception as e:
        logger.exception(
            "Unable to download training & test CSV, check your internet connection. Error: %s", e)
    
    # set tracking server
    
    mlflow.set_tracking_uri("http://127.0.0.1:8080")
    
    # create a new experiment

    mlflow.set_experiment("MLflow_iris")
    
    if mlflow.active_run():
       mlflow.end_run()
    
    with mlflow.start_run(nested= True):    
    # Define the model hyperparameters
        params = {
        "solver": "lbfgs",
        "max_iter": 1000,
        "multi_class": "auto",
        "random_state": 8888}

    # Train the model
        lr = LogisticRegression(**params)
        lr.fit(X_train, y_train)

    # Predict
        y_pred = lr.predict(X_test)

    # Eval and log
        recall, f1, accuracy, precision = eval_metrics(y_test, y_pred)
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average= 'weighted')
        f1 = f1_score(y_test, y_pred, average= 'weighted')

        mlflow.log_params(params)
        mlflow.log_metrics({
        'recall': recall,
        'f1': f1,
        'accuracy': accuracy,
        'precision': precision })

        signature = mlflow.models.infer_signature(X_train, lr.predict(X_train))
    
        print(f"  recall:  {recall}")
        print(f"  f1: {f1}")
        print(f"  accuracy:  {accuracy}") 
        print(f"  precision:  {precision}")
        
   

(150, 4)
  recall:  1.0
  f1: 1.0
  accuracy:  1.0
  precision:  1.0
🏃 View run rumbling-hawk-909 at: http://127.0.0.1:8080/#/experiments/228836478271870438/runs/47e221c458324ec09cb079143af594b2
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/228836478271870438


## Model Logging/autologging

## Parameters explained:
Parameter:	Description

sk_model:	The trained sklearn model you want to save (e.g. LogisticRegression())

artifact_path:	The path inside the run's artifact store to save the model ("model" is typical)

registered_model_name:	(Optional) If you want to register the model to MLflow Model Registry, give a name here

signature:	(Optional) Schema of inputs/outputs — helps validate during serving (use mlflow.models.infer_signature())

input_example:	(Optional) A sample input — helps document and validate how to call the model

conda_env or pip_requirements:	(Optional) Specify dependencies, but usually MLflow handles this automatically

In [87]:
model_info = mlflow.sklearn.log_model(
    sk_model=lr,
    signature= signature,
    artifact_path='MLflow_iris',
    input_example=X_train,
    registered_model_name='tracking_iris'
)

mlflow.log_params(params)
mlflow.log_metrics({
        'recall': recall,
        'f1': f1,
        'accuracy': accuracy,
        'precision': precision })

## registers model info in model artifacts.


if mlflow.active_run():
       mlflow.end_run()
       
print(model_info.signature)

Registered model 'tracking_iris' already exists. Creating a new version of this model...
2025/04/07 19:55:12 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking_iris, version 8


🏃 View run bouncy-asp-355 at: http://127.0.0.1:8080/#/experiments/228836478271870438/runs/302e3d2620dc47339a3188be9e7106ab
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/228836478271870438
inputs: 
  ['sepal length (cm)': double (required), 'sepal width (cm)': double (required), 'petal length (cm)': double (required), 'petal width (cm)': double (required)]
outputs: 
  [Tensor('int64', (-1,))]
params: 
  None



Created version '8' of model 'tracking_iris'.


In [86]:
#  Load the model as a Python Function (pyfunc) and use it for inference

loaded_model= mlflow.pyfunc.load_model(model_info.model_uri)

predictions = loaded_model.predict(X_test)


iris_feature_names = iris.feature_names

result = pd.DataFrame(X_test, columns=iris_feature_names)
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[:2]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
73,6.1,2.8,4.7,1.2,1,1
18,5.7,3.8,1.7,0.3,0,0


In [81]:
run = mlflow.get_run("f7912e51367c48c2b4594b35d27c80f9")
print(run.data.params)
print(run.data.metrics)
print(run.info.artifact_uri)

{}
{}
mlflow-artifacts:/228836478271870438/f7912e51367c48c2b4594b35d27c80f9/artifacts


# What happens under the hood

MLflow:

1.Serializes the model (usually with cloudpickle)


2. Stores it in the run’s artifacts/ folder (e.g. under artifacts/model/)


3. Saves a MLmodel file that contains metadata, including:

    a.  What loader to use (python_function)


    b.  What environment the model needs


    c.  Signature and input example (if given)
    

    d.   Optionally, adds the model to the Model Registry (if registered_model_name is given)

## MLflow artifacts

mlruns/
├── <experiment_id>/

│   └── <run_id>/

│       └── artifacts/

│           ├── model.pkl/

│           ├── confusion_matrix.png


│           └── transformed_data.csv


In [None]:
# How to Log Artifacts (code)-- don't run
import mlflow
import pandas as pd

with mlflow.start_run():
    # Create a sample CSV
    df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]})
    df.to_csv("sample_data.csv", index=False)
    
    # Log as artifact
    mlflow.log_artifact("sample_data.csv")

# This will upload sample_data.csv to the run's artifact folder


# Log a file under a subdirectory

mlflow.log_artifact("sample_data.csv", artifact_path="data")


# Log an entire folder

import os

# Create a directory with multiple files

os.makedirs("outputs", exist_ok=True)
with open("outputs/info.txt", "w") as f:
    f.write("This is some metadata.")

mlflow.log_artifacts("outputs")  # Notice the plural: log_artifacts()


#  Log images, like plots

import matplotlib.pyplot as plt

plt.plot([1, 2, 3], [4, 5, 6])
plt.title("Simple Plot")
plt.savefig("plot.png")
mlflow.log_artifact("plot.png", artifact_path="plots")


#  How to Retrieve Artifacts Later

run_id = mlflow.active_run().info.run_id
local_path = mlflow.artifacts.download_artifacts(run_id=run_id, path="data/sample_data.csv")
print("Downloaded to:", local_path)


## MLflow model signature

Summary

Feature	Description

✅ What	Schema of input/output for the model

📍 How	Use infer_signature() or define manually

🔒 Why	Helps with validation, deployment, documentation

🚀 Where	Passed in log_model(), saved in MLmodel file