In [15]:
import mlflow
from mlflow.models import infer_signature
from mlflow import MlflowClient

import sklearn.datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import xgboost as xgb

import ray
from ray import train, tune
from ray.air.integrations.mlflow import setup_mlflow

TRACKING_URI = "http://0.0.0.0:5000"
EXP_NAME = "ml_platform"
MODEL_NAME = "xgb-breast-cancer-classifer"
ALIAS = "champion"

mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(experiment_name=EXP_NAME)

client = MlflowClient()

In [18]:
def train_breast_cancer(config: dict) -> None:
    setup_mlflow(
        config,
        experiment_name=EXP_NAME,
        tracking_uri=TRACKING_URI,
    )
    # Load dataset
    data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, test_x, train_y, test_y = train_test_split(data, labels, test_size=0.2)

    model = xgb.XGBClassifier(**config)
    model.fit(train_x, train_y)

    predictions = model.predict(test_x)
    accuracy = accuracy_score(test_y, predictions)
    mlflow.log_metric("accuracy", accuracy)
    signature = infer_signature(train_x, model.predict(train_x))

    mlflow.xgboost.log_model(
        model,
        "xgb_models",
        conda_env=mlflow.xgboost.get_default_conda_env(),
        signature=signature,
        model_format="json",
    )

    train.report({"accuracy": accuracy, "done": True})


def tune_with_setup() -> None:

    ray.init(num_cpus=6)
    trainable_with_resources = tune.with_resources(train_breast_cancer, {"cpu": 2})

    tuner = tune.Tuner(
        trainable_with_resources,
        tune_config=tune.TuneConfig(
            num_samples=10,
        ),
        run_config=train.RunConfig(
            name="mlflow",
        ),
        param_space={
            "objective": "binary:logistic",
            "eval_metric": ["logloss", "error"],
            "max_depth": tune.randint(1, 9),
            "min_child_weight": tune.choice([1, 2, 3]),
            "subsample": tune.uniform(0.5, 1.0),
            "eta": tune.loguniform(1e-4, 1e-1),
        },
    )

    tuner.fit()

In [19]:
tune_with_setup()

0,1
Current time:,2025-01-27 18:41:38
Running for:,00:00:14.66
Memory:,10.6/62.5 GiB

Trial name,status,loc,eta,max_depth,min_child_weight,subsample,iter,total time (s),accuracy
train_breast_cancer_ed212_00000,TERMINATED,10.23.68.39:844934,0.00543655,8,1,0.550216,1,1.00867,0.938596
train_breast_cancer_ed212_00001,TERMINATED,10.23.68.39:844932,0.00693825,6,1,0.707401,1,0.994777,0.95614
train_breast_cancer_ed212_00002,TERMINATED,10.23.68.39:844933,0.000108335,3,1,0.704784,1,1.4965,0.587719
train_breast_cancer_ed212_00003,TERMINATED,10.23.68.39:845317,0.0308117,5,1,0.569257,1,1.27351,0.95614
train_breast_cancer_ed212_00004,TERMINATED,10.23.68.39:845318,0.016026,4,2,0.984724,1,0.822576,0.947368
train_breast_cancer_ed212_00005,TERMINATED,10.23.68.39:845429,0.079856,5,3,0.621502,1,0.941563,0.964912
train_breast_cancer_ed212_00006,TERMINATED,10.23.68.39:845729,0.00387077,6,3,0.588911,1,1.04974,0.938596
train_breast_cancer_ed212_00007,TERMINATED,10.23.68.39:845728,0.000284779,6,3,0.586301,1,0.877857,0.605263
train_breast_cancer_ed212_00008,TERMINATED,10.23.68.39:845735,0.000419796,7,1,0.50715,1,0.830408,0.719298
train_breast_cancer_ed212_00009,TERMINATED,10.23.68.39:846032,0.00025741,7,1,0.663386,1,0.93089,0.614035


2025-01-27 18:41:38,331	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/ssafarveisi/ray_results/mlflow' in 0.0035s.
2025-01-27 18:41:38,336	INFO tune.py:1041 -- Total run time: 14.71 seconds (14.66 seconds for the tuning loop).


In [20]:
from mlflow.entities import ViewType

experiment_id = "1"
runs = mlflow.search_runs(
    experiment_ids=[experiment_id], run_view_type=ViewType.ACTIVE_ONLY
)

In [21]:
best_run = runs.loc[runs["metrics.accuracy"].idxmax()]
best_run_id = best_run.run_id
model_uri = f"runs:/{best_run_id}/xgb_models"

In [22]:
result = mlflow.register_model(model_uri, MODEL_NAME)

Registered model 'xgb-breast-cancer-classifer' already exists. Creating a new version of this model...
2025/01/27 18:42:02 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: xgb-breast-cancer-classifer, version 2
Created version '2' of model 'xgb-breast-cancer-classifer'.


In [23]:
client.set_registered_model_alias(MODEL_NAME, ALIAS, result.version)

In [24]:
champion_version = mlflow.pyfunc.load_model(f"models:/{MODEL_NAME}@{ALIAS}")

In [25]:
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
_, test_x, _, test_y = train_test_split(data, labels, test_size=0.2)


champion_version.predict(test_x)

array([0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1,
       1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0,
       1, 1, 1, 1])