## Automatic Logging with MLflow Tracking

In [20]:
import mlflow
import mlflow.sklearn
from mlflow.models.signature import ModelSignature
from mlflow.types import Schema, ColSpec
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy


mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns --host 0.0.0.0 --port 5000

--backend-store-uri sqlite:///mlflow.db: Stores runs in a SQLite database.

--default-artifact-root ./mlruns: Stores models and artifacts in mlruns/.

--host 0.0.0.0 --port 8080: Allows remote access.

In [10]:
# Enable auto logging
mlflow.sklearn.autolog()

# Load the Iris dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)  # Convert to DataFrame
y = iris.target  # Labels

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the Logistic Regression model
lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)


2025/03/27 20:45:58 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'f5e3472c424b4be8a1474976c9fca8c5', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


🏃 View run casual-steed-515 at: http://127.0.0.1:8080/#/experiments/517851688068192192/runs/f5e3472c424b4be8a1474976c9fca8c5
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/517851688068192192


LogisticRegression()

In [14]:
# Define input schema (with feature names and types)
input_schema = Schema([
    ColSpec("float", col) for col in X.columns  # All input features are floats
])

# Define output schema (Logistic Regression predicts class labels: integer)
output_schema = Schema([ColSpec("integer", "predicted_class")])

# Create custom signature
custom_signature = ModelSignature(inputs=input_schema, outputs=output_schema)



# set mlflow tracking url

mlflow.set_tracking_uri("http://127.0.0.1:8080")

mlflow.set_experiment('mlflow_demo_diabetes')

# Define model parameters and metrics
params = {"solver": "lbfgs", "C": 1.0}
metrics = {"train_accuracy": lr.score(X_train_scaled, y_train), "test_accuracy": lr.score(X_test_scaled, y_test)}

# Start MLflow run
with mlflow.start_run():
    mlflow.log_params(params)   # Log hyperparameters
    mlflow.log_metrics(metrics)  # Log model performance
    mlflow.set_tag("model_type", "Logistic Regression")  # Set a tag
    mlflow.sklearn.log_model( # Log the model with the custom signature
   sk_model=lr,
   artifact_path="iris_lr_model",
   signature=custom_signature,  # Custom schema
   input_example=X_train[:5]    # Example input data
    )

🏃 View run youthful-stoat-230 at: http://127.0.0.1:8080/#/experiments/517851688068192192/runs/889f9994355e44448589c642286c2e4e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/517851688068192192




## MLflow params , metrics

In [27]:

# Load latest run details
experiment_id = mlflow.get_experiment_by_name("mlflow_demo_diabetes").experiment_id
run = mlflow.search_runs(experiment_ids=experiment_id).iloc[0]

if experiment_id :
    runs = mlflow.search_runs(experiment_id)
    if not runs.empty:
        run_id = run.iloc[0]
        logged_params = mlflow.get_run(run_id).data.params
        metrics = mlflow.get_run(run_id).data.metrics
        print("Logged Parameters:", logged_params)
        print("logged Metrics", metrics)
    else:
        print("No runs Found")
else:
    print('No experiment found')   


Logged Parameters: {'solver': 'lbfgs', 'C': '1.0'}
logged Metrics {'train_accuracy': 0.9666666666666667, 'test_accuracy': 1.0}
