In [7]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [8]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.16.1-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==2.16.1 (from mlflow)
  Downloading mlflow_skinny-2.16.1-py3-none-any.whl.metadata (30 kB)
Collecting Flask<4 (from mlflow)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.3-py2.py3-none-any.whl.metadata (7.7 kB)
Collecting pyarrow<18,>=4.0.0 (from mlflow)
  Downloading pyarrow-17.0.0-cp311-cp311-win_amd64.whl.metadata (3.4 kB)
Collecting scikit-learn<2 (from mlflow)
  Downloading scikit_learn-1.5.2-cp311-cp311-win_amd64.whl.metadata (13 kB)
Collecting scipy<2 (from mlflow)
  Downloading scipy-1.14.1-cp311-cp311-win_amd64.whl.metadata (60 kB)
Collecting sqlalchemy<3,>=1.4.0 (from mlflow)
  Downloading 

In [9]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

mlflow.autolog()

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# Create and train models.
rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
rf.fit(X_train, y_train)

# Use the model to make predictions on the test dataset.
predictions = rf.predict(X_test)

2024/09/16 20:54:16 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/09/16 20:54:17 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'de446d5c3402443ba3778e02005dafa1', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


In [None]:
!mlflow ui --port=5001

In [None]:
import mlflow 
from sklearn.model_selection import train_test_split 
from sklearn.datasets import load_diabetes

# set the experiment id
mlflow.set_experiment(experiment_id="0")

mlflow.autolog() 
db = load_diabetes() 

X_train, X_test, y_train, y_test = train_test_split(db.data, db.target) 

# Create and train models. 
rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3) 
rf.fit(X_train, y_train) 

# Use the model to make predictions on the test dataset. 
predictions = rf.predict(X_test)

In [None]:
!mlflow ui --port=5001

### Store ML models

In [None]:
import mlflow
from mlflow.models import infer_signature

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

with mlflow.start_run() as run:
    # Load the diabetes dataset.
    db = load_diabetes()
    X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

    # Create and train models.
    rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
    rf.fit(X_train, y_train)

    # Use the model to make predictions on the test dataset.
    predictions = rf.predict(X_test)
    print(predictions)

    signature = infer_signature(X_test, predictions)
    mlflow.sklearn.log_model(rf, "model", signature=signature)

    print(f"Run ID: {run.info.run_id}")

In [None]:
!mlflow ui --port=5001

### Load a model

In [None]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

model = mlflow.sklearn.load_model("mlruns/0/a7edd2653458481ab60488297d06a781/artifacts/model/")
predictions = model.predict(X_test)
print(predictions)