In [1]:
import emoji

import mlflow
from mlflow.models import infer_signature
from mlflow import MlflowClient

import sklearn.datasets
from sklearn.metrics import accuracy_score, recall_score
from sklearn.model_selection import train_test_split

import xgboost as xgb

import ray
from ray import train, tune
from ray.air.integrations.mlflow import setup_mlflow

In [2]:
TRACKING_URI = "http://0.0.0.0:5000"
EXP_NAME = "ml_platform"
MODEL_NAME = "iris-classifer"
ALIAS = "champion"
MINIMUM_REQUIRED_ACCURACY = 0.95

In [3]:
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(experiment_name=EXP_NAME)

client = MlflowClient()

In [5]:
ray.init(num_cpus=6)

2025-01-30 19:35:14,531	INFO worker.py:1841 -- Started a local Ray instance.


0,1
Python version:,3.9.1
Ray version:,2.41.0


In [6]:
conda_env = {
    "name": "mlflow-env",
    "channels": ["conda-forge"],
    "dependencies": [
        "python=3.9.1",
        "pip<=24.0",
        {
            "pip": [
                "xgboost==2.1.3",
                "scikit-learn==1.5.2"
            ]
        },
    ],
}


def train_function_mlflow(config: dict) -> None:
    setup_mlflow(
        config,
        experiment_name=EXP_NAME,
        tracking_uri=TRACKING_URI,
    )

    # Load dataset
    iris = sklearn.datasets.load_iris(as_frame=True)
    train_x, test_x, train_y, test_y = train_test_split(
        iris.data, iris.target, test_size=0.2
    )

    # Pass the trial's config (hyperparameters for the xgb classifier)
    model = xgb.XGBClassifier(**config)
    model.fit(train_x, train_y)

    predictions = model.predict(test_x)
    # Measure accuracy and recall for this trial
    accuracy = accuracy_score(test_y, predictions)
    recall = recall_score(test_y, predictions, average="micro")

    signature = infer_signature(train_x, model.predict(train_x))

    # Log the metrics as well as artifacts for this trial
    mlflow.log_metrics({"recall": recall, "accuracy": accuracy})
    mlflow.xgboost.log_model(
        model,
        "iris_xgb",
        conda_env=conda_env,
        signature=signature,
        model_format="json",
    )
    # Get the best result later based on the following metrics
    train.report({"accuracy": accuracy, "recall": recall})


def tune_with_setup() -> tune.ResultGrid:
    # Each trial uses 2 cpus. Therfore, we have at most 3 trials running concurrently
    trainable_with_resources = tune.with_resources(train_function_mlflow, {"cpu": 2})
    tuner = tune.Tuner(
        trainable_with_resources,
        tune_config=tune.TuneConfig(
            num_samples=20,
        ),
        run_config=train.RunConfig(
            name="mlflow",
        ),
        param_space={
            "objective": "multi:softmax",
            "eval_metric": ["logloss", "error"],
            "max_depth": tune.randint(1, 9),
            "min_child_weight": tune.choice([1, 2, 3]),
            "subsample": tune.uniform(0.5, 1.0),
            "eta": tune.loguniform(1e-4, 1e-1),
        },
    )

    results = tuner.fit()
    return results

In [23]:
results = tune_with_setup()

0,1
Current time:,2025-01-30 19:36:18
Running for:,00:00:09.81
Memory:,10.1/62.5 GiB

Trial name,status,loc,eta,max_depth,min_child_weight,subsample,iter,total time (s),accuracy,recall
train_function_mlflow_128f7_00000,TERMINATED,10.16.28.149:1309898,0.00010847,4,2,0.783788,1,0.95875,0.966667,0.966667
train_function_mlflow_128f7_00001,TERMINATED,10.16.28.149:1309896,0.000941725,1,1,0.84055,1,1.03581,0.9,0.9
train_function_mlflow_128f7_00002,TERMINATED,10.16.28.149:1309897,0.0112783,4,3,0.625216,1,1.04158,1.0,1.0
train_function_mlflow_128f7_00003,TERMINATED,10.16.28.149:1309887,0.0400486,5,3,0.899396,1,1.10258,0.9,0.9
train_function_mlflow_128f7_00004,TERMINATED,10.16.28.149:1309890,0.00138294,5,1,0.555241,1,0.929015,0.966667,0.966667
train_function_mlflow_128f7_00005,TERMINATED,10.16.28.149:1309876,0.000323095,4,1,0.502694,1,0.961324,0.933333,0.933333
train_function_mlflow_128f7_00006,TERMINATED,10.16.28.149:1309888,0.0235558,8,1,0.726899,1,1.53301,0.933333,0.933333
train_function_mlflow_128f7_00007,TERMINATED,10.16.28.149:1309875,0.00011474,8,3,0.50107,1,1.02697,0.933333,0.933333
train_function_mlflow_128f7_00008,TERMINATED,10.16.28.149:1309899,0.00200205,3,3,0.685448,1,1.00256,0.933333,0.933333
train_function_mlflow_128f7_00009,TERMINATED,10.16.28.149:1309889,0.0011327,7,2,0.813342,1,1.81465,0.966667,0.966667


2025-01-30 19:36:18,755	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/home/ssafarveisi/ray_results/mlflow' in 0.0173s.
2025-01-30 19:36:18,760	INFO tune.py:1041 -- Total run time: 9.84 seconds (9.80 seconds for the tuning loop).


In [8]:
best_result = results.get_best_result(metric="accuracy", mode="max")

In [9]:
best_result.config

{'objective': 'multi:softmax',
 'eval_metric': ['logloss', 'error'],
 'max_depth': 1,
 'min_child_weight': 1,
 'subsample': 0.5121816085593194,
 'eta': 0.05128295544527588}

In [10]:
best_result.metrics_dataframe

Unnamed: 0,accuracy,recall,timestamp,checkpoint_dir_name,done,training_iteration,trial_id,date,time_this_iter_s,time_total_s,...,hostname,node_ip,time_since_restore,iterations_since_restore,config/objective,config/eval_metric,config/max_depth,config/min_child_weight,config/subsample,config/eta
0,1.0,1.0,1738262117,,False,1,f26a2_00000,2025-01-30_19-35-17,0.810883,0.810883,...,LXKA-J9SYDX3,10.16.28.149,0.810883,1,multi:softmax,"[logloss, error]",1,1,0.512182,0.051283


In [8]:
from mlflow.entities import ViewType

experiment_id = mlflow.get_experiment_by_name(name=EXP_NAME).experiment_id
runs = mlflow.search_runs(
    experiment_ids=[experiment_id], run_view_type=ViewType.ACTIVE_ONLY
)

In [9]:
best_run = runs.loc[runs["metrics.accuracy"].idxmax()]
best_run_id = best_run.run_id
best_run_accuracy = best_run["metrics.accuracy"]
model_uri = f"runs:/{best_run_id}/iris_xgb"

In [13]:
if best_run_accuracy >= MINIMUM_REQUIRED_ACCURACY:
    print(emoji.emojize("Model accuracy met the required minimum accuracy :fire:"))
    result = mlflow.register_model(model_uri, MODEL_NAME)
else:
    print(
        emoji.emojize(
            "Best run did not meet the required minimum accuracy :sad_but_relieved_face:"
        )
    )

Registered model 'iris-classifer' already exists. Creating a new version of this model...
2025/01/30 19:35:42 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: iris-classifer, version 3


Model accuracy met the required minimum accuracy 🔥


Created version '3' of model 'iris-classifer'.


In [None]:
client.set_registered_model_alias(MODEL_NAME, ALIAS, result.version)

In [15]:
champion_version = mlflow.pyfunc.load_model(f"models:/{MODEL_NAME}@{ALIAS}")

In [16]:
iris = sklearn.datasets.load_iris(as_frame=True)
_, test_x, _, test_y = train_test_split(iris.data, iris.target, test_size=0.2)


champion_version.predict(test_x)

array([2, 1, 1, 0, 2, 2, 1, 1, 2, 1, 2, 1, 2, 0, 1, 2, 0, 0, 0, 2, 2, 2,
       0, 0, 0, 2, 2, 1, 1, 1], dtype=int32)

In [10]:
# Updating best model's dependencies

mlflow.models.update_model_requirements(
    model_uri=model_uri,
    operation="add",
    requirement_list=["boto3==1.35.99", "kserve[ray]"],
)

2025/01/30 22:46:07 INFO mlflow.models.model: Retrieving model requirements files from mlflow-artifacts:/1/09588fddc5894a8db58b273d738097b6/artifacts/iris_xgb...
2025/01/30 22:46:08 INFO mlflow.models.model: Done updating requirements!

Old requirements:
['mlflow==2.20.0', 'xgboost==2.1.3', 'scikit-learn==1.5.2']

Updated requirements:
['mlflow==2.20.0',
 'xgboost==2.1.3',
 'scikit-learn==1.5.2',
 'boto3==1.35.99',
 'kserve[ray]']

2025/01/30 22:46:08 INFO mlflow.models.model: Uploading updated requirements files to mlflow-artifacts:/1/09588fddc5894a8db58b273d738097b6/artifacts/iris_xgb...


In [18]:
# Validate the model before deployment
mlflow.models.predict(
    model_uri=model_uri,
    input_data=test_x,
    env_manager="uv",
    install_mlflow=False,
)

2025/01/30 19:35:44 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'
2025/01/30 19:35:44 INFO mlflow.utils.virtualenv: Creating a new environment in /tmp/tmpmeptnwpf/envs/virtualenv_envs/mlflow-c72e38679939a94e754a658ea14e5f9e791f91ee with python version 3.9.1 using uv
Using CPython [36m3.9.1[39m
Creating virtual environment at: [36m/tmp/tmpmeptnwpf/envs/virtualenv_envs/mlflow-c72e38679939a94e754a658ea14e5f9e791f91ee[39m
Activate with: [32msource /tmp/tmpmeptnwpf/envs/virtualenv_envs/mlflow-c72e38679939a94e754a658ea14e5f9e791f91ee/bin/activate[39m
2025/01/30 19:35:44 INFO mlflow.utils.virtualenv: Installing dependencies
[2mUsing Python 3.9.1 environment at: /tmp/tmpmeptnwpf/envs/virtualenv_envs/mlflow-c72e38679939a94e754a658ea14e5f9e791f91ee[0m
[2mResolved [1m3 packages[0m [2min 25ms[0m[0m
[2mInstalled [1m3 packages[0m [2min 15ms[0m[0m
 [32m+[39m [1mpip[0m[2m==24.2[0m
 [32m+[39m [1msetuptools[0m[2m==74.1.2[0m
 [3

{"predictions": [2, 1, 1, 0, 2, 2, 1, 1, 2, 1, 2, 1, 2, 0, 1, 2, 0, 0, 0, 2, 2, 2, 0, 0, 0, 2, 2, 1, 1, 1]}

In [11]:
from mlflow.artifacts import download_artifacts

In [12]:
s3_path = f"s3://customerintelligence/ml_platform/mlartifacts/{experiment_id}/{best_run_id}/artifacts/iris_xgb/"
print(s3_path)

s3://customerintelligence/ml_platform/mlartifacts/1/09588fddc5894a8db58b273d738097b6/artifacts/iris_xgb/


In [13]:
# Download the model's artifacts (e.g., requirements.ext)
download_artifacts(
    artifact_uri=s3_path + "requirements.txt", dst_path="./best_model_artifacts"
)

'/home/ssafarveisi/Desktop/Projects/K8sApp/ml_platform/best_model_artifacts/requirements.txt'

In [14]:
ray.shutdown()