In [None]:
#import kedro_mlflow

In [None]:
# import mlflow

# mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")


In [5]:
import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Load the Iris dataset
X, y = datasets.load_iris(return_X_y=True)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 8888,
}

# Train the model
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# Predict on the test set
y_pred = lr.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)




In [2]:
params, accuracy

({'solver': 'lbfgs',
  'max_iter': 1000,
  'multi_class': 'auto',
  'random_state': 8888},
 1.0)

In [4]:
# Set our tracking server uri for logging
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Create a new MLflow Experiment
mlflow.set_experiment("MLflow Quickstart")

# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metric("accuracy", accuracy)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for iris data")

    # Infer the model signature
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="iris_model",
        signature=signature,
        input_example=X_train,
        registered_model_name="tracking-quickstart",
    )


Registered model 'tracking-quickstart' already exists. Creating a new version of this model...
2024/11/18 12:33:02 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart, version 3
Created version '3' of model 'tracking-quickstart'.
  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 23.50it/s] 
2024/11/18 12:33:03 INFO mlflow.tracking._tracking_service.client: 🏃 View run secretive-hawk-20 at: http://127.0.0.1:8080/#/experiments/848671246422989337/runs/13aa4c31deca48e7aa027f0260424991.
2024/11/18 12:33:03 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/848671246422989337.


In [None]:
model_info, model_info.model_uri

(<mlflow.models.model.ModelInfo at 0x1af89d9f490>,
 'runs:/e1d5ab7369e54136be9cbfbedfa60f7a/iris_model')

In [None]:
# Load the model back for predictions as a generic Python Function model
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

predictions = loaded_model.predict(X_test)

iris_feature_names = datasets.load_iris().feature_names

result = pd.DataFrame(X_test, columns=iris_feature_names)
result["actual_class"] = y_test
result["predicted_class"] = predictions

result[:4]


Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 19.27it/s]


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
0,6.1,2.8,4.7,1.2,1,1
1,5.7,3.8,1.7,0.3,0,0
2,7.7,2.6,6.9,2.3,2,2
3,6.0,2.9,4.5,1.5,1,1


Method-2

In [13]:
%pip install -q mlflow databricks-sdk


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import mlflow

mlflow.login()


mlflow.autolog

In [6]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

mlflow.autolog()

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
# MLflow triggers logging automatically upon model fitting
rf.fit(X_train, y_train)


2024/11/18 14:25:53 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/11/18 14:25:54 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.
2024/11/18 14:25:54 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '0c986946342d46bea89d8adfb6cda5cc', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow
2024/11/18 14:26:11 INFO mlflow.tracking._tracking_service.client: 🏃 View run handsome-conch-487 at: http://127.0.0.1:8080/#/experiments/848671246422989337/runs/0c986946342d46bea89d8adfb6cda5cc.
2024/11/18 14:26:11 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/848671246422989337.


Customize Autologging Behavior

In [9]:
import mlflow

mlflow.autolog(
    log_model_signatures=False,
    extra_tags={"DEMO_TAG": "ANANT"},
)


2024/11/18 14:30:30 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/11/18 14:30:30 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.


In [10]:
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
# MLflow triggers logging automatically upon model fitting
rf.fit(X_train, y_train)

2024/11/18 14:30:31 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '0ad78bf25ec0429ab9432bf97e59db68', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow
2024/11/18 14:30:42 INFO mlflow.tracking._tracking_service.client: 🏃 View run overjoyed-carp-510 at: http://127.0.0.1:8080/#/experiments/848671246422989337/runs/0ad78bf25ec0429ab9432bf97e59db68.
2024/11/18 14:30:42 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/848671246422989337.


Enable / Disable Autologging for Specific Libraries

In [12]:
import torch

ModuleNotFoundError: No module named 'torch'

In [11]:
import mlflow

# Option 1: Enable autologging only for PyTorch
mlflow.pytorch.autolog()

# Option 2: Disable autologging for scikit-learn, but enable it for other libraries
mlflow.sklearn.autolog(disable=True)
mlflow.autolog()


ModuleNotFoundError: No module named 'torch'

In [None]:
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
# MLflow triggers logging automatically upon model fitting
rf.fit(X_train, y_train)