In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
from sklearn.metrics import silhouette_score
import mlflow
import mlflow.sklearn
import warnings
warnings.filterwarnings("ignore")

In [3]:
X, _ = make_blobs(n_samples=1000, centers=3, n_features=2, random_state=42)

In [5]:
models = [
    ("KMeans (k=3)", KMeans(n_clusters=3, random_state=42), X),
    ("KMeans (k=4)", KMeans(n_clusters=4, random_state=42), X),
    ("DBSCAN (eps=0.3)", DBSCAN(eps=0.3, min_samples=5), X),
    ("Agglomerative (n=4)", AgglomerativeClustering(n_clusters=4), X)
]


In [7]:
reports = []

for model_name, model, X_data in models:
    model.fit(X_data)

    # Get labels
    if hasattr(model, "predict"):
        labels = model.predict(X_data)
    else:
        labels = model.labels_

    # Compute silhouette score if more than 1 cluster found
    if len(set(labels)) > 1:
        score = silhouette_score(X_data, labels)
    else:
        score = -1  # Invalid score for single cluster

    reports.append({
        "model_name": model_name,
        "model": model,
        "score": score,
        "labels": labels
    })


In [9]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Clustering Experiment")

for report in reports:
    model_name = report['model_name']
    model = report['model']
    score = report['score']

    with mlflow.start_run(run_name=model_name):
        mlflow.log_param("model", model_name)
        mlflow.log_metric("silhouette_score", score)

        # Log only compatible models
        if isinstance(model, KMeans):
            mlflow.sklearn.log_model(model, "model")


2025/05/10 14:40:58 INFO mlflow.tracking.fluent: Experiment with name 'Clustering Experiment' does not exist. Creating a new experiment.
The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



🏃 View run KMeans (k=3) at: http://localhost:5000/#/experiments/878009883715146363/runs/13c557ecb6e24ac88f6430ded0453585
🧪 View experiment at: http://localhost:5000/#/experiments/878009883715146363




🏃 View run KMeans (k=4) at: http://localhost:5000/#/experiments/878009883715146363/runs/ab1dd7f4e31d48b5963f5a34b59553e9
🧪 View experiment at: http://localhost:5000/#/experiments/878009883715146363
🏃 View run DBSCAN (eps=0.3) at: http://localhost:5000/#/experiments/878009883715146363/runs/1a90af3b7bd341b3a76875c1624cc108
🧪 View experiment at: http://localhost:5000/#/experiments/878009883715146363
🏃 View run Agglomerative (n=4) at: http://localhost:5000/#/experiments/878009883715146363/runs/d4b3be9d23114ab78190a7a7b78a784c
🧪 View experiment at: http://localhost:5000/#/experiments/878009883715146363
