using a simple logistic regression model on the Iris dataset, where we log the hyperparameters, metrics (accuracy over epochs), and save the trained model as an artifact using MLflow Tracking.

In [33]:
mlflow.set_tracking_uri("http://localhost:5000")


In [34]:
# Import necessary libraries
import mlflow
import mlflow.sklearn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import os

In [35]:
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [36]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [37]:
# Set hyperparameters for Logistic Regression
learning_rate = 0.01  # In LogisticRegression, this can be interpreted as the inverse of regularization strength
epochs = 10
penalty = 'l2'

In [39]:
# Start an MLflow run
with mlflow.start_run() as run:
    # Log hyperparameters
    mlflow.log_param("learning_rate", learning_rate)
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("penalty", penalty)

    # Initialize and train the model
    model = LogisticRegression(penalty=penalty, solver='lbfgs', max_iter=1, warm_start=True)

    # Train over multiple "epochs" (re-fitting the model)
    for epoch in range(epochs):
        model.fit(X_train, y_train)

        # Predict and calculate accuracy
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        # Log the accuracy as a metric
        mlflow.log_metric("accuracy", accuracy, step=epoch)
        print(f"Epoch {epoch+1}/{epochs} - Accuracy: {accuracy:.4f}")

    # Save the model as an artifact
    model_filename = "logistic_regression_model.pkl"
    mlflow.sklearn.save_model(model, model_filename)

    # Log the saved model as an artifact
    mlflow.log_artifact(model_filename)

    # Save and log an additional artifact (like a text file)
    with open("output.txt", "w") as f:
        f.write("Logistic Regression model for Iris dataset.")
    mlflow.log_artifact("output.txt")

    print("Run complete. Check MLflow UI for details.")

# Instructions to view the results in MLflow UI:
# After running the script, type "mlflow ui" in your terminal to start the UI, and navigate to http://localhost:5000


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Epoch 1/10 - Accuracy: 0.3667
Epoch 2/10 - Accuracy: 0.6333
Epoch 3/10 - Accuracy: 0.7000
Epoch 4/10 - Accuracy: 0.6333
Epoch 5/10 - Accuracy: 0.7000
Epoch 6/10 - Accuracy: 0.6333
Epoch 7/10 - Accuracy: 0.7000


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Epoch 8/10 - Accuracy: 0.6667
Epoch 9/10 - Accuracy: 0.7000
Epoch 10/10 - Accuracy: 0.8333


2024/11/03 12:45:09 INFO mlflow.tracking._tracking_service.client: 🏃 View run delicate-midge-977 at: http://localhost:5000/#/experiments/0/runs/f9b4403d87ad46ca923c7fc951360e9e.
2024/11/03 12:45:09 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/0.


Run complete. Check MLflow UI for details.


In [55]:
mlflow.end_run()

In [56]:
mlflow.set_experiment("my_experiment_name2")

2024/11/03 14:21:10 INFO mlflow.tracking.fluent: Experiment with name 'my_experiment_name2' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/984714799679988408', creation_time=1730632870807, experiment_id='984714799679988408', last_update_time=1730632870807, lifecycle_stage='active', name='my_experiment_name2', tags={}>

In [62]:
import mlflow
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load dataset
data = datasets.load_iris()
x = data.data
y = data.target

# Split dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

# Create and train the model
model = LogisticRegression(max_iter=200)  # Ensure max_iter is set high enough for convergence
model.fit(x_train, y_train)

# Make predictions
y_pred = model.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)


with mlflow.start_run():
    mlflow.log_param("model_type", "LogisticRegression")
    mlflow.log_param("test_size", 0.3)
    mlflow.log_param("random_state", 42)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.sklearn.log_model(model, "model")



Model Accuracy: 1.0


2024/11/03 14:35:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run marvelous-hog-147 at: http://localhost:5000/#/experiments/984714799679988408/runs/4fe8b66317c14e01b85401f1ca8ce45a.
2024/11/03 14:35:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/984714799679988408.


In [60]:
import mlflow
logged_model = 'runs:/845c9f43eb624edfa3b12e6d514392be/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(x_test)

Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.29it/s]


array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0,
       0, 1, 2, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0,
       1, 2, 0, 1, 2, 0, 2, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 0, 2, 0,
       0, 2, 1, 2, 2, 2, 2, 1, 0, 0, 1, 2, 0, 0, 0, 1, 2, 0, 2, 2, 0, 1,
       1, 2, 1, 2, 0, 2, 1, 2, 1, 1])