# MLflow Training Demo

Creates a model for predicting the quality of wine using [xgboost.XGBRegressor](https://xgboost.readthedocs.io/en/stable/python/python_api.html).  We perform a naive search of the hyperparameter space in order to determine the optimal values.

The results of the model training runs are tracked in an MLflow experiment. The best performing model is then registered in the model registry and set to the `Production` stage for usage.

> This is notebook is based on `train.py` from the MLflow example [xgboost_sklearn](https://github.com/mlflow/mlflow/tree/master/examples/xgboost/xgboost_sklearn).

Attribution
* The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality and sourced from [here](https://raw.githubusercontent.com/mlflow/mlflow/master/examples/sklearn_elasticnet_wine/wine-quality.csv).
* P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
* Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.


In [1]:
import warnings

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from mlflow.models.signature import infer_signature
import mlflow.sklearn
import mlflow

from mlflow_adsp import create_unique_name, upsert_experiment

warnings.filterwarnings("ignore")
np.random.seed(42)

In [2]:
"""
Load user specific configuration.
"""

from ae5_tools import load_ae5_user_secrets

load_ae5_user_secrets()

## Tracking Setup

Create our experiment to track all our model training runs in.

* This experiment is used across runs of the notebook and will not be recreated if it already exists.
* The name of the experiment is defined as an anaconda project variable located within `anaconda-project.yml`.
    * The variable name is `MLFLOW_EXPERIMENT_NAME`, and the default value is `demo_sklearn_elasticnet_wine`.

In [3]:
from mlflow.tracking import MlflowClient

# Generate a client, this will be used for several operations across the notebook.
client = MlflowClient()

In [4]:
from mlflow.exceptions import MlflowException


def upsert_model_registry(client: MlflowClient) -> None:
    try:
        client.create_registered_model(name=os.environ["MLFLOW_EXPERIMENT_NAME"])
    except MlflowException as error:
        if error.error_code != "RESOURCE_ALREADY_EXISTS":
            raise error

In [5]:
"""
Ensure that the experiment and model registry exist for reporting and tracking.
"""

import os
from mlflow.entities import Experiment

experiment: Experiment = mlflow.set_experiment(experiment_id=upsert_experiment())
upsert_model_registry(client=client)

# Training

In [6]:
"""
Data Preparation
Loads the data from csv file, and returns our train, test splits for training.
"""

from pydantic import BaseModel


class DataSet(BaseModel):
    X_train: pd.DataFrame
    X_test: pd.DataFrame
    y_train: pd.DataFrame
    y_test: pd.DataFrame

    class Config:
        arbitrary_types_allowed = True


def prepare_data(csv_url: str) -> DataSet:
    data: pd.DataFrame = pd.read_csv(csv_url, sep=",")

    # The predicted column is `quality`, which is a scalar from [3, 9]
    X: pd.DataFrame = data.drop(["quality"], axis=1)
    y: pd.DataFrame = data[["quality"]]

    X_train, X_test, y_train, y_test = train_test_split(X, y)
    return DataSet(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)

In [7]:
"""
Model Training Function
"""

import os

import xgboost as xgb
from sklearn.model_selection import train_test_split

import mlflow
import mlflow.xgboost


class HyperParameters(BaseModel):
    n_estimators: int
    max_depth: int
    reg_lambda: float
    gamma: float
    early_stopping_rounds: int


def train(ds: DataSet, parameters: HyperParameters) -> str:
    # Start the MLflow run to track the model training.
    with mlflow.start_run(run_name=create_unique_name(name=os.environ["MLFLOW_EXPERIMENT_NAME"])) as run:
        # Enable MLflow logging
        mlflow.xgboost.autolog()

        # https://xgboost.readthedocs.io/en/stable/python/python_api.html
        regressor = xgb.XGBRegressor(
            n_estimators=parameters.n_estimators,
            max_depth=parameters.max_depth,
            reg_lambda=parameters.reg_lambda,
            gamma=parameters.gamma,
            early_stopping_rounds=parameters.early_stopping_rounds,
        )
        regressor.fit(X=ds.X_train, y=ds.y_train, eval_set=[(ds.X_test, ds.y_test)], verbose=False)

        # Return the run_id for training run comparisons.
        return run.info.run_id

# Train a single model

In [8]:
from mlflow.entities import Run

data_set: DataSet = prepare_data(csv_url="datasets/wine-quality.csv")
parameters = HyperParameters(n_estimators=18, max_depth=10, reg_lambda=1, gamma=0, early_stopping_rounds=10)

run_id: str = train(ds=data_set, parameters=parameters)
stand_alone_run: Run = client.search_runs([experiment.experiment_id], f"attributes.run_id = '{run_id}'")[0]

print(f"Run ID: {run_id}")
print(stand_alone_run.data.metrics)

Run ID: 358ed7cb7ceb4806a2846a79592a5221
{'stopped_iteration': 17.0, 'best_iteration': 17.0, 'validation_0-rmse': 0.6415873227145958}


# Determine the performance on the `Production` Model

In [11]:
import mlflow
logged_model = 'models:/demo_sklearn_elasticnet_wine/Production'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model, suppress_warnings=True)

In [42]:
from sklearn.metrics import classification_report

# Performance on Training data
y_pred: pd.DataFrame = pd.DataFrame(loaded_model.predict(data_set.X_train), columns=["quality"])
y_pred["quality"] = y_pred["quality"].round().astype(dtype=int)
print(classification_report(y_true=data_set.y_train, y_pred=y_pred, labels=[3,4,5,6,7,8,9]))

              precision    recall  f1-score   support

           3       0.00      0.00      0.00        13
           4       0.00      0.00      0.00       131
           5       0.54      0.26      0.35      1099
           6       0.49      0.87      0.63      1654
           7       0.46      0.14      0.22       639
           8       0.00      0.00      0.00       132
           9       0.00      0.00      0.00         5

    accuracy                           0.49      3673
   macro avg       0.21      0.18      0.17      3673
weighted avg       0.46      0.49      0.42      3673



In [43]:
from sklearn.metrics import classification_report

# Performance on Test data
y_pred: pd.DataFrame = pd.DataFrame(loaded_model.predict(data_set.X_test), columns=["quality"])
y_pred["quality"] = y_pred["quality"].round().astype(dtype=int)
print(classification_report(y_true=data_set.y_test, y_pred=y_pred, labels=[3,4,5,6,7,8,9]))

              precision    recall  f1-score   support

           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00        32
           5       0.47      0.23      0.31       358
           6       0.48      0.86      0.62       544
           7       0.55      0.17      0.26       241
           8       0.00      0.00      0.00        43
           9       0.00      0.00      0.00         0

   micro avg       0.48      0.48      0.48      1225
   macro avg       0.21      0.18      0.17      1225
weighted avg       0.46      0.48      0.42      1225



# Perform a naive search of the hyperparameter space

We will naively review model performance at specific internals across the solution space.  There are many optimization functions which can be leveraged base on business needs.

In [None]:
from tqdm import trange

runs: list[str] = []

for i in trange(3, 9):
    n_estimators: int = i * 2 + 1
    for j in range(3, 9):
        max_depth: int = j + 3
        data_set: DataSet = prepare_data(csv_url="datasets/wine-quality.csv")
        parameters = HyperParameters(
            n_estimators=n_estimators,
            max_depth=max_depth,
            reg_lambda=1,
            gamma=0,
            early_stopping_rounds=10,
        )
        run_id: str = train(ds=data_set, parameters=parameters)
        runs.append(run_id)

# Find and register the best model
Define our functions

In [None]:
from mlflow.tracking import MlflowClient
from mlflow.entities import Run
from typing import Optional


def get_best_run(client: MlflowClient, experiment_id, runs: list[str]) -> tuple[Optional[Run], dict]:
    _inf = np.finfo(np.float64).max

    best_metrics: dict = {
        "validation_0-rmse": _inf,
    }
    best_run: Optional[Run] = None

    for run_id in runs:
        # find the best run, log its metrics as the final metrics of this run.
        run: Run = client.search_runs([experiment_id], f"attributes.run_id = '{run_id}'")[0]
        if (
            "validation_0-rmse" in run.data.metrics
            and run.data.metrics["validation_0-rmse"] < best_metrics["validation_0-rmse"]
        ):
            best_metrics = run.data.metrics
            best_run = run

    return best_run, best_metrics

In [None]:
from mlflow.entities.model_registry import ModelVersion


def register_best_model(client: MlflowClient, run: Run) -> ModelVersion:
    model_version: ModelVersion = client.create_model_version(
        name=os.environ["MLFLOW_EXPERIMENT_NAME"],
        source=f"{run.info.artifact_uri}/model",
        run_id=run.info.run_id,
        tags={"run_id": run.info.run_id},
    )
    return model_version

## Review the runs for the best performing model and add it to the model registry

In [None]:
(best_run, metrics) = get_best_run(client=client, experiment_id=experiment.experiment_id, runs=runs)
model_version: ModelVersion = register_best_model(client=client, run=best_run)

print(f"Run ID: {best_run.info.run_id}")
print(f"Report: {metrics}")

## Promote the latest model to the `Production` stage for usage.

In [None]:
model_version: ModelVersion = client.transition_model_version_stage(
    name=os.environ["MLFLOW_EXPERIMENT_NAME"],
    version=model_version.version,
    stage="Production",
    archive_existing_versions=True,
)