In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Step 1: Create an imbalanced binary classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, 
                           weights=[0.9, 0.1], flip_y=0, random_state=42)

np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100], dtype=int64))

In [3]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

In [20]:
from imblearn.combine import SMOTETomek
smt = SMOTETomek(random_state=42)
X_train_res, y_train_res = smt.fit_resample(X_train, y_train)
print(np.unique(y_train_res, return_counts=True))

(array([0, 1]), array([618, 618], dtype=int64))


In [21]:
# --- Train & log runs ---
import mlflow, sklearn
import mlflow.sklearn, mlflow.xgboost

mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Anomaly Detection")

reports = []
runs_info = {}  # store run_id per model for later

for name, params, model, train_set, test_set in models:
    Xtr, ytr = train_set
    Xte, yte = test_set
    model.set_params(**params)
    model.fit(Xtr, ytr)
    y_pred = model.predict(Xte)
    report = classification_report(yte, y_pred, output_dict=True)
    reports.append(report)

    with mlflow.start_run(run_name=name) as run:
        mlflow.log_params(params)
        mlflow.log_metrics({
            "accuracy": report["accuracy"],
            "recall_class_1": report["1"]["recall"],
            "recall_class_0": report["0"]["recall"],
            "f1_score_macro": report["macro avg"]["f1-score"],
        })

        # Faster: avoid env inference
        reqs = [
            f"mlflow=={mlflow.__version__}",
            f"scikit-learn=={sklearn.__version__}",
            "cloudpickle"
        ]

        if "XGB" in name:
            mlflow.xgboost.log_model(model, artifact_path="model",
                                     pip_requirements=reqs,
                                     registered_model_name=None)
        else:
            mlflow.sklearn.log_model(model, artifact_path="model",
                                     pip_requirements=reqs,
                                     registered_model_name=None)

        runs_info[name] = run.info.run_id




🏃 View run Logistic Regression at: http://localhost:5000/#/experiments/3/runs/51d17bba829944de972f5ef0bb6d5b99
🧪 View experiment at: http://localhost:5000/#/experiments/3




🏃 View run Random Forest at: http://localhost:5000/#/experiments/3/runs/aebdef871b32427d8b651a01b46a6c60
🧪 View experiment at: http://localhost:5000/#/experiments/3




🏃 View run XGBClassifier at: http://localhost:5000/#/experiments/3/runs/82a623c08470404481660179be1cbdec
🧪 View experiment at: http://localhost:5000/#/experiments/3




🏃 View run XGBClassifier With SMOTE at: http://localhost:5000/#/experiments/3/runs/1ad97453b0764dc7a4ada7adde3d651c
🧪 View experiment at: http://localhost:5000/#/experiments/3


In [22]:
from mlflow.tracking import MlflowClient
client = MlflowClient()

smote_run_id = runs_info["XGBClassifier With SMOTE"]
model_name = "XGB-Smote"
model_uri = f"runs:/{smote_run_id}/model"  # must match artifact_path

mv = mlflow.register_model(model_uri=model_uri, name=model_name)
version = mv.version
print("Registered:", model_name, "version:", version)

Registered model 'XGB-Smote' already exists. Creating a new version of this model...
2025/08/11 22:33:01 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGB-Smote, version 1


Registered: XGB-Smote version: 1


Created version '1' of model 'XGB-Smote'.


In [23]:
# Set 'challenger' on that version
client.set_registered_model_alias(name=model_name, alias="challenger", version=version)

# Copy to production model name (or just use stages instead)
prod_name = "anomaly-detection-prod"
copied = client.copy_model_version(src_model_uri=f"models:/{model_name}@challenger",
                                   dst_name=prod_name)
prod_version = copied.version

# Give the prod version an alias 'champion'
client.set_registered_model_alias(name=prod_name, alias="champion", version=prod_version)


Successfully registered model 'anomaly-detection-prod'.
Copied version '1' of model 'XGB-Smote' to version '1' of model 'anomaly-detection-prod'.


In [24]:
# --- Load by alias and score ---
loaded = mlflow.xgboost.load_model(f"models:/{prod_name}@champion")
print(loaded.predict(X_test)[:4])


[0 0 0 0]
