In [26]:
import emoji

import mlflow
from mlflow.models import infer_signature
from mlflow import MlflowClient

import sklearn.datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import xgboost as xgb

import ray
from ray import train, tune
from ray.air.integrations.mlflow import setup_mlflow

TRACKING_URI = "http://0.0.0.0:5000"
EXP_NAME = "ml_platform"
MODEL_NAME = "xgb-breast-cancer-classifer"
ALIAS = "champion"
MINIMUM_REQUIRED_ACCURACY = 0.95

mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(experiment_name=EXP_NAME)

client = MlflowClient()

In [3]:
def train_breast_cancer(config: dict) -> None:
    setup_mlflow(
        config,
        experiment_name=EXP_NAME,
        tracking_uri=TRACKING_URI,
    )
    # Load dataset
    data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, test_x, train_y, test_y = train_test_split(data, labels, test_size=0.2)

    model = xgb.XGBClassifier(**config)
    model.fit(train_x, train_y)

    predictions = model.predict(test_x)
    accuracy = accuracy_score(test_y, predictions)
    signature = infer_signature(train_x, model.predict(train_x))

    mlflow.log_metric("accuracy", accuracy)
    mlflow.xgboost.log_model(
        model,
        "xgb_models",
        conda_env=mlflow.xgboost.get_default_conda_env(),
        signature=signature,
        model_format="json",
    )

    train.report({"accuracy": accuracy, "done": True})


def tune_with_setup() -> None:

    ray.init(num_cpus=6)
    trainable_with_resources = tune.with_resources(train_breast_cancer, {"cpu": 2})

    tuner = tune.Tuner(
        trainable_with_resources,
        tune_config=tune.TuneConfig(
            num_samples=20,
        ),
        run_config=train.RunConfig(
            name="mlflow",
        ),
        param_space={
            "objective": "binary:logistic",
            "eval_metric": ["logloss", "error"],
            "max_depth": tune.randint(1, 9),
            "min_child_weight": tune.choice([1, 2, 3]),
            "subsample": tune.uniform(0.5, 1.0),
            "eta": tune.loguniform(1e-4, 1e-1),
        },
    )

    tuner.fit()

In [4]:
tune_with_setup()

0,1
Current time:,2025-01-27 23:48:09
Running for:,00:00:31.43
Memory:,9.0/62.5 GiB

Trial name,status,loc,eta,max_depth,min_child_weight,subsample,iter,total time (s),accuracy
train_breast_cancer_b5094_00000,TERMINATED,192.168.0.64:858771,0.0123832,5,1,0.516165,1,2.17412,0.95614
train_breast_cancer_b5094_00001,TERMINATED,192.168.0.64:858770,0.000135785,6,3,0.696799,1,2.25327,0.596491
train_breast_cancer_b5094_00002,TERMINATED,192.168.0.64:858772,0.0016692,4,3,0.857644,1,1.9505,0.701754
train_breast_cancer_b5094_00003,TERMINATED,192.168.0.64:859107,0.0157963,1,1,0.756797,1,1.07892,0.938596
train_breast_cancer_b5094_00004,TERMINATED,192.168.0.64:859163,0.000317742,3,1,0.688255,1,1.17834,0.684211
train_breast_cancer_b5094_00005,TERMINATED,192.168.0.64:859162,0.0489526,6,3,0.516478,1,1.44817,0.95614
train_breast_cancer_b5094_00006,TERMINATED,192.168.0.64:859454,0.00160845,4,3,0.589493,1,0.981803,0.570175
train_breast_cancer_b5094_00007,TERMINATED,192.168.0.64:859518,0.00233215,6,3,0.996853,1,1.15156,0.903509
train_breast_cancer_b5094_00008,TERMINATED,192.168.0.64:859519,0.00414506,3,3,0.907776,1,1.08388,0.903509
train_breast_cancer_b5094_00009,TERMINATED,192.168.0.64:859785,0.00940425,2,3,0.76735,1,1.15563,0.947368


2025-01-27 23:48:09,428	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/ssafarveisi/ray_results/mlflow' in 0.0043s.
2025-01-27 23:48:09,435	INFO tune.py:1041 -- Total run time: 31.56 seconds (31.42 seconds for the tuning loop).


In [5]:
from mlflow.entities import ViewType

experiment_id = mlflow.get_experiment_by_name(name=EXP_NAME).experiment_id
runs = mlflow.search_runs(
    experiment_ids=[experiment_id], run_view_type=ViewType.ACTIVE_ONLY
)

In [13]:
best_run = runs.loc[runs["metrics.accuracy"].idxmax()]
best_run_id = best_run.run_id
best_run_accuracy = best_run["metrics.accuracy"]
model_uri = f"runs:/{best_run_id}/xgb_models"

In [34]:
if best_run_accuracy > MINIMUM_REQUIRED_ACCURACY:
    print(emoji.emojize("Model accuracy met the required minimum accuracy :fire:"))
    result = mlflow.register_model(model_uri, MODEL_NAME)
else:
    print(
        emoji.emojize(
            "Best run did not meet the required minimum accuracy :sad_but_relieved_face:"
        )
    )

Registered model 'xgb-breast-cancer-classifer' already exists. Creating a new version of this model...
2025/01/28 00:07:06 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-breast-cancer-classifer, version 6


Model accuracy met the required minimum accuracy 🔥


Created version '6' of model 'xgb-breast-cancer-classifer'.


In [28]:
client.set_registered_model_alias(MODEL_NAME, ALIAS, result.version)

In [29]:
champion_version = mlflow.pyfunc.load_model(f"models:/{MODEL_NAME}@{ALIAS}")

In [30]:
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
_, test_x, _, test_y = train_test_split(data, labels, test_size=0.2)


champion_version.predict(test_x)

array([1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
       0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 0, 1, 0])