In [36]:
# Import required libraries
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import pandas as pd

# Load Iris dataset
# This dataset contains 4 features and a target variable with 3 classes of flowers.
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)  # Features
y = data.target  # Target variable

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define solvers to compare for Logistic Regression
solvers = ["lbfgs", "liblinear", "sag", "saga", "newton-cg"]

# Set up MLflow for experiment tracking
# The tracking URI points to a locally running MLflow server
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Solver Comparison for Logistic Regression")

# Iterate over each solver and train a Logistic Regression model
for solver in solvers:
    with mlflow.start_run():  # Start a new MLflow run
        print(f"Training with solver: {solver}")
        
        # Build a pipeline for scaling and model training
        pipeline = Pipeline([
            ("scaler", StandardScaler()),  # Standardize features
            ("model", LogisticRegression(solver=solver, max_iter=1000, random_state=42, multi_class="auto"))
        ])
        
        # Train the pipeline on the training data
        pipeline.fit(X_train, y_train)
        
        # Predict on the test set and calculate evaluation metrics
        y_pred = pipeline.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)  # Classification accuracy
        f1 = f1_score(y_test, y_pred, average="weighted")  # Weighted F1 score
        
        print(f"Accuracy: {accuracy}, F1 Score: {f1}")
        
        # Log solver as a parameter
        mlflow.log_param("solver", solver)
        mlflow.log_param("max_iter", 1000)  # Log the max iteration parameter
        
        # Log metrics
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("f1_score", f1)
        
        # Log the model to MLflow
        mlflow.sklearn.log_model(
            sk_model=pipeline,
            artifact_path=f"logistic_regression_{solver}",
            registered_model_name=f"logistic_regression_solver_{solver}"  # Register the model under a unique name
        )
        
        print(f"Model with solver '{solver}' logged successfully.\n")


MlflowException: API request to http://127.0.0.1:5000/api/2.0/mlflow/experiments/get-by-name failed with exception HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /api/2.0/mlflow/experiments/get-by-name?experiment_name=Solver+Comparison+for+Logistic+Regression (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000197F9BDA080>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))