In [1]:
import mlflow

mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

In [2]:
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 8888,
}

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred = lr.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)




In [3]:
# Create an instance of a PandasDataset
dataset = mlflow.data.from_numpy(
    X_train, source="https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html", name="iris")

  return _dataset_source_registry.resolve(


In [4]:
# Create a new MLflow Experiment
mlflow.set_experiment("MLflow Quickstart")

# Start an MLflow run
with mlflow.start_run() as run:
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")

    # Log the Dataset to an MLflow run by using the `log_input` API
    mlflow.log_input(dataset, context="training")

    # Infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart",
    )


2024/09/05 10:13:54 INFO mlflow.tracking.fluent: Experiment with name 'MLflow Quickstart' does not exist. Creating a new experiment.


Successfully registered model 'tracking-quickstart'.
2024/09/05 10:13:57 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 1
Created version '1' of model 'tracking-quickstart'.


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

2024/09/05 10:13:57 INFO mlflow.tracking._tracking_service.client: 🏃 View run auspicious-dove-176 at: http://127.0.0.1:8080/#/experiments/824395778428720242/runs/51118cba4ffd4d43b9b1e4eb3a7a5dd5.
2024/09/05 10:13:57 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/824395778428720242.


View the Run in the MLflow UI: http://127.0.0.1:8080

In [None]:
# Retrieve the run information
logged_run = mlflow.get_run(run.info.run_id)

# Retrieve the Dataset object
logged_dataset = logged_run.inputs.dataset_inputs[0].dataset

# View some of the recorded Dataset information
print(f"Dataset name: {logged_dataset.name}")
print(f"Dataset digest: {logged_dataset.digest}")
print(f"Dataset profile: {logged_dataset.profile}")
print(f"Dataset schema: {logged_dataset.schema}")

Dataset name: iris
Dataset digest: 7fc7d768
Dataset profile: {"features_shape": [120, 4], "features_size": 480, "features_nbytes": 3840}
Dataset schema: {"mlflow_tensorspec": {"features": "[{\"type\": \"tensor\", \"tensor-spec\": {\"dtype\": \"float64\", \"shape\": [-1, 4]}}]", "targets": null}}
