In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8,
                           weights=[0.9, 0.1], flip_y=0, random_state=42)

np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100], dtype=int64))

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

### Handle class imbalance

In [4]:
from imblearn.combine import SMOTETomek

smt = SMOTETomek(random_state=42)
X_train_res, y_train_res = smt.fit_resample(X_train, y_train)
np.unique(y_train_res, return_counts=True)

(array([0, 1]), array([619, 619], dtype=int64))

### Track Experiments

In [5]:
models = [
    (
        "Logistic Regression",
        {"C": 1, "solver": 'liblinear'},
        LogisticRegression(),
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Random Forest",
        {"n_estimators": 30, "max_depth": 3},
        RandomForestClassifier(),
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier",
        {"use_label_encoder": False, "eval_metric": 'logloss'},
        XGBClassifier(),
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier With SMOTE",
        {"use_label_encoder": False, "eval_metric": 'logloss'},
        XGBClassifier(),
        (X_train_res, y_train_res),
        (X_test, y_test)
    )
]

In [6]:
reports = []

for model_name, params, model, train_set, test_set in models:
    X_train, y_train = train_set
    X_test, y_test = test_set

    model.set_params(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)

In [7]:
reports[0]

{'0': {'precision': 0.9454545454545454,
  'recall': 0.9629629629629629,
  'f1-score': 0.9541284403669725,
  'support': 270.0},
 '1': {'precision': 0.6,
  'recall': 0.5,
  'f1-score': 0.5454545454545454,
  'support': 30.0},
 'accuracy': 0.9166666666666666,
 'macro avg': {'precision': 0.7727272727272727,
  'recall': 0.7314814814814814,
  'f1-score': 0.749791492910759,
  'support': 300.0},
 'weighted avg': {'precision': 0.9109090909090909,
  'recall': 0.9166666666666666,
  'f1-score': 0.91326105087573,
  'support': 300.0}}

In [8]:
import mlflow
import sklearn
import xgboost

In [9]:
mlflow.set_experiment('Anomaly Detection')
mlflow.set_tracking_uri('http://127.0.0.1:5000/')

for i, element in enumerate(models):
    name = element[0]
    params = element[1]
    model = element[2]

    metrics = dict()
    metrics['accuracy'] = reports[i]['accuracy']
    metrics['recall_class_0'] = reports[i]['0']['recall']
    metrics['recall_class_1'] = reports[i]['1']['recall']
    metrics['f1_score_macro'] = reports[i]['macro avg']['f1-score']
    metrics['f1_score_weighted'] = reports[i]['weighted avg']['f1-score']

    with mlflow.start_run(run_name=name):

        mlflow.log_metrics(metrics)

        mlflow.log_params(params)

        if "XGB" in name:
            mlflow.xgboost.log_model(model, 'model')
        else:
            mlflow.sklearn.log_model(model, 'model')

2025/06/25 15:45:26 INFO mlflow.tracking.fluent: Experiment with name 'Anomaly Detection' does not exist. Creating a new experiment.


### Register the Model

In [23]:
model_name = "XGB-Smote"
run_id = input("Enter the run id for this run: ")
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri, model_name)

Registered model 'XGB-Smote' already exists. Creating a new version of this model...
2025/06/25 16:00:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGB-Smote, version 2
Created version '2' of model 'XGB-Smote'.


<ModelVersion: aliases=[], creation_timestamp=1750847414630, current_stage='None', description='', last_updated_timestamp=1750847414630, name='XGB-Smote', run_id='69ff8faa8f814cafb7f375054115d3a4', run_link='', source='file:///D:/mlflow/mlruns/366931837630879397/69ff8faa8f814cafb7f375054115d3a4/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='2'>

### Load the Model

In [24]:
model_version = 2
model_name = "XGB-Smote"
model_uri = f"models:/{model_name}/{model_version}"
loaded_model = mlflow.xgboost.load_model(model_uri)
y_pred = loaded_model.predict(X_test)
y_pred[:5]

array([0, 0, 0, 0, 0])

### Transition the Model to Production

In [27]:
from mlflow import MlflowClient
current_model_uri = f"models:/{model_name}@challenger"
prod_model_name = 'anomaly-detection-prod'

client = MlflowClient()

client.copy_model_version(src_model_uri=current_model_uri, dst_name=prod_model_name)

<ModelVersion: aliases=[], creation_timestamp=1750847770825, current_stage='None', description='', last_updated_timestamp=1750847770825, name='anomaly-detection-prod', run_id='69ff8faa8f814cafb7f375054115d3a4', run_link='', source='models:/XGB-Smote/2', status='READY', status_message='', tags={}, user_id='', version='1'>

In [29]:
model_name = 'anomaly-detection-prod'
model_uri = f"models:/{model_name}@champion"
loaded_model = mlflow.xgboost.load_model(model_uri)
y_pred = loaded_model.predict(X_test)
y_pred[:5]

array([0, 0, 0, 0, 0])