# About this notebook
This notebook demonstrates how to use MLflow for tracking machine learning experiments using the famous Iris dataset from sklearn.

# Import libraries and prepare data

In [None]:
import mlflow
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Prepare dataset

In [None]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Track experiment

In [None]:
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

with mlflow.start_run(run_name="LogReg_Iris"):

    # Parameters
    penalty = 'l2'
    C = 1.0
    solver = 'lbfgs'
    max_iter = 200

    # Log parameters
    mlflow.log_param("penalty", penalty)
    mlflow.log_param("C", C)
    mlflow.log_param("solver", solver)
    mlflow.log_param("max_iter", max_iter)

    # Train model
    model = LogisticRegression(
        penalty=penalty,
        C=C,
        solver=solver,
        max_iter=max_iter,
        multi_class='auto'
    )
    model.fit(X_train, y_train)

    # Predict and calculate accuracy
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    # Log metrics
    mlflow.log_metric("accuracy", acc)

    # Log model
    mlflow.sklearn.log_model(model, "model")

    print(f"Logged model with accuracy: {acc:.4f}")


# Compare runs

In [None]:
C_values = [0.01, 0.1, 1, 10]

for C in C_values:
    with mlflow.start_run(run_name=f"LogReg_C={C}"):

        # Model setup
        model = LogisticRegression(
            penalty='l2',
            C=C,
            solver='lbfgs',
            max_iter=200,
            multi_class='auto'
        )
        model.fit(X_train, y_train)

        # Prediction & accuracy
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)

        # Log hyperparameter and metric
        mlflow.log_param("C", C)
        mlflow.log_metric("accuracy", acc)

        # Log model
        mlflow.sklearn.log_model(model, "model")

        print(f"Run with C={C} logged. Accuracy: {acc:.4f}")


# Log the plot of confusion matrix

In [None]:
best_C = 1.0

with mlflow.start_run(run_name=f"LogReg_C={best_C}_ConfMatrix"):

    model = LogisticRegression(
        penalty='l2',
        C=best_C,
        solver='lbfgs',
        max_iter=200,
        multi_class='auto'
    )
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Log parameter and accuracy
    acc = accuracy_score(y_test, y_pred)
    mlflow.log_param("C", best_C)
    mlflow.log_metric("accuracy", acc)

    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=iris.target_names, yticklabels=iris.target_names)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix (C={best_C})")

    # Save and log the figure
    plot_path = "confusion_matrix.png"
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    plt.close()

    # Log model
    mlflow.sklearn.log_model(model, "model")

    print(f"Confusion matrix logged for model with C={best_C}. Accuracy: {acc:.4f}")
