In [38]:
# Import required libraries
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import pandas as pd

# Load Iris dataset
# This dataset contains 4 features and a target variable with 3 classes of flowers.
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)  # Features
y = data.target  # Target variable

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define solvers to compare for Logistic Regression
solvers = ["lbfgs", "liblinear", "sag", "saga", "newton-cg"]

# Set up MLflow for experiment tracking
# The tracking URI points to a locally running MLflow server
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Solver Comparison for Logistic Regression")

# Iterate over each solver and train a Logistic Regression model
for solver in solvers:
    with mlflow.start_run():  # Start a new MLflow run
        print(f"Training with solver: {solver}")
        
        # Build a pipeline for scaling and model training
        pipeline = Pipeline([
            ("scaler", StandardScaler()),  # Standardize features
            ("model", LogisticRegression(solver=solver, max_iter=1000, random_state=42, multi_class="auto"))
        ])
        
        # Train the pipeline on the training data
        pipeline.fit(X_train, y_train)
        
        # Predict on the test set and calculate evaluation metrics
        y_pred = pipeline.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)  # Classification accuracy
        f1 = f1_score(y_test, y_pred, average="weighted")  # Weighted F1 score
        
        print(f"Accuracy: {accuracy}, F1 Score: {f1}")
        
        # Log solver as a parameter
        mlflow.log_param("solver", solver)
        mlflow.log_param("max_iter", 1000)  # Log the max iteration parameter
        
        # Log metrics
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("f1_score", f1)
        
        # Log the model to MLflow
        mlflow.sklearn.log_model(
            sk_model=pipeline,
            artifact_path=f"logistic_regression_{solver}",
            registered_model_name=f"logistic_regression_solver_{solver}"  # Register the model under a unique name
        )
        
        print(f"Model with solver '{solver}' logged successfully.\n")


2024/12/16 11:46:35 INFO mlflow.tracking.fluent: Experiment with name 'Solver Comparison for Logistic Regression' does not exist. Creating a new experiment.




Training with solver: lbfgs
Accuracy: 1.0, F1 Score: 1.0


Successfully registered model 'logistic_regression_solver_lbfgs'.
2024/12/16 11:46:45 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_solver_lbfgs, version 1
Created version '1' of model 'logistic_regression_solver_lbfgs'.


Model with solver 'lbfgs' logged successfully.

🏃 View run hilarious-skink-568 at: http://127.0.0.1:5000/#/experiments/110915975825167327/runs/7f491db601ab4f81b5af1031d7ed663c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/110915975825167327
Training with solver: liblinear
Accuracy: 0.9666666666666667, F1 Score: 0.9664109121909632


Successfully registered model 'logistic_regression_solver_liblinear'.
2024/12/16 11:46:54 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_solver_liblinear, version 1
Created version '1' of model 'logistic_regression_solver_liblinear'.


Model with solver 'liblinear' logged successfully.

🏃 View run loud-conch-900 at: http://127.0.0.1:5000/#/experiments/110915975825167327/runs/7b72cae3c3d041ceba34ac3507c46492
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/110915975825167327
Training with solver: sag
Accuracy: 1.0, F1 Score: 1.0


Successfully registered model 'logistic_regression_solver_sag'.
2024/12/16 11:47:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_solver_sag, version 1
Created version '1' of model 'logistic_regression_solver_sag'.


Model with solver 'sag' logged successfully.

🏃 View run valuable-elk-746 at: http://127.0.0.1:5000/#/experiments/110915975825167327/runs/4b22778ec9c94ee999b90c58ad468df3
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/110915975825167327
Training with solver: saga
Accuracy: 1.0, F1 Score: 1.0


Successfully registered model 'logistic_regression_solver_saga'.
2024/12/16 11:47:13 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_solver_saga, version 1
Created version '1' of model 'logistic_regression_solver_saga'.


Model with solver 'saga' logged successfully.

🏃 View run learned-chimp-931 at: http://127.0.0.1:5000/#/experiments/110915975825167327/runs/1abd30355dd44dc49b47aee20e72e634
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/110915975825167327
Training with solver: newton-cg
Accuracy: 1.0, F1 Score: 1.0


Successfully registered model 'logistic_regression_solver_newton-cg'.
2024/12/16 11:47:22 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: logistic_regression_solver_newton-cg, version 1


Model with solver 'newton-cg' logged successfully.

🏃 View run dashing-hound-651 at: http://127.0.0.1:5000/#/experiments/110915975825167327/runs/2fa923e0aa7d45488a30d00dcd0e1421
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/110915975825167327


Created version '1' of model 'logistic_regression_solver_newton-cg'.
