In [30]:
import os
display(os.getcwd())
os.chdir('/root/projects/PythonProjects/learning-machine-learning-models/tools/mlflow')
display(os.getcwd())
import tqdm

'/root/projects/PythonProjects/learning-machine-learning-models/tools/mlflow'

'/root/projects/PythonProjects/learning-machine-learning-models/tools/mlflow'

In [48]:
import mlflow

from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=123
)

EXPERIMENT_NAME = "mlflow-demo"
if mlflow.get_experiment_by_name(EXPERIMENT_NAME) is None:
    EXPERIMENT_ID = mlflow.create_experiment(EXPERIMENT_NAME)

for idx, depth in tqdm.tqdm(enumerate([1, 2, 5, 10, 20]), total=5):
    clf = DecisionTreeClassifier(max_depth=depth)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    y_pred_prob = clf.predict_proba(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    auc_score = roc_auc_score(y_test.reshape(-1, 1), y_pred_prob, multi_class='ovr')

    # Start MLflow
    RUN_NAME = f"run_{idx}"
    with mlflow.start_run(experiment_id=EXPERIMENT_ID, run_name=RUN_NAME) as run:
        # Retrieve run id
        RUN_ID = run.info.run_id

        # Track parameters
        mlflow.log_param("depth", depth)

        # Track metrics
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric('ROC AUC score', auc_score)

        # Track model
        mlflow.sklearn.log_model(clf, "classifier")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:07<00:00,  1.42s/it]


In [49]:
EXPERIMENT_ID, RUN_ID, mlflow

('216735303135488445',
 'ea305031891a4a45802d37aeee1e8923',
 <module 'mlflow' from '/root/miniconda3/envs/devenv/lib/python3.9/site-packages/mlflow/__init__.py'>)

In [72]:
import mlflow
import pandas as pd

from mlflow.tracking import MlflowClient


EXPERIMENT_NAME = "mlflow-demo"

client = MlflowClient()

# Retrieve Experiment information
EXPERIMENT_ID = client.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
print('EXPERIMENT_ID', EXPERIMENT_ID)

# Retrieve Runs information (parameter 'depth', metric 'accuracy')
# ALL_RUNS_INFO = client.list_run_infos(EXPERIMENT_ID)
ALL_RUNS_INFO = client.search_runs(experiment_ids=EXPERIMENT_ID)

# ALL_RUNS_ID = [run.run_id for run in ALL_RUNS_INFO]
ALL_RUNS_ID = [run.info.run_id for run in ALL_RUNS_INFO]

ALL_PARAM = [client.get_run(run_id).data.params["depth"] for run_id in ALL_RUNS_ID]
ALL_ACCURACY = [client.get_run(run_id).data.metrics["accuracy"] for run_id in ALL_RUNS_ID]
ALL_ROCAUC = [client.get_run(run_id).data.metrics["ROC AUC score"] for run_id in ALL_RUNS_ID]

# View Runs information
DF = pd.DataFrame({"Run ID": ALL_RUNS_ID, "Params": ALL_PARAM, "Accuracies": ALL_ACCURACY, 'ROC AUC Scores':ALL_ROCAUC})

# Retrieve Artifact from best run
best_run_id = DF.sort_values("ROC AUC Scores", ascending=False).iloc[0]["Run ID"]
best_model_path = mlflow.artifacts.download_artifacts(run_id=best_run_id, artifact_path='classifier')#, "classifier")
best_model = mlflow.sklearn.load_model(best_model_path)

# # Delete runs (DO NOT USE UNLESS CERTAIN)
# for run_id in ALL_RUNS_ID:
#     client.delete_run(run_id)
#
# # Delete experiment (DO NOT USE UNLESS CERTAIN)
# client.delete_experiment(EXPERIMENT_ID)
print('Best run ID:', best_run_id)
DF

EXPERIMENT_ID 216735303135488445
Best run ID: ee05a605112a4caab7291cd8bf2f3470


Unnamed: 0,Run ID,Params,Accuracies,ROC AUC Scores
0,ea305031891a4a45802d37aeee1e8923,20,0.966667,0.977904
1,9792a49e8a5948adbb08005ad8574d3d,10,0.966667,0.977904
2,ee05a605112a4caab7291cd8bf2f3470,5,0.966667,1.0
3,70d99bd2d7534a179bbd92619e5c270a,2,0.966667,0.988271
4,912872f2a8e34657932ccb0df7aec357,1,0.633333,0.87098
