In [1]:
import mlflow

from sklearn.datasets import load_iris
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics import (
    silhouette_score,
    davies_bouldin_score,
)

In [2]:
# Se hace la lectura del dataset de prueba

db = load_iris()
features = db.data
target = db.target

In [3]:
# Opción básica
mlflow.autolog()

# Clustering con DBSCAN.
dbscan = DBSCAN(eps=0.5, min_samples=5)

dbscan.fit_predict(features)

2024/04/16 18:59:21 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/04/16 18:59:22 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '5b5ca93235644c1697eeef74d94de5b0', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  1,
        1,  1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,
       -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1, -1,  1,  1,  1,
        1,  1,  1, -1, -1,  1, -1, -1,  1,  1,  1,  1,  1,  1,  1, -1, -1,
        1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1, -1, -1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1],
      dtype=int64)

In [5]:
# Clustering con Kmeans.
kmeans = KMeans(n_clusters=3, random_state=42)

kmeans.fit_predict(features)

davies_bouldin_score(features, kmeans.fit_predict(features))

2024/04/16 19:00:44 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '4d54d29d5d874cf4b8624e16bb2a9d5d', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


2024/04/16 19:00:47 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '9b7801c38b384e87973c85ca0d75763d', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


0.6660385791628493

In [6]:
# Se puede hacer una experimentación nominal

exp_name = 'Clustering-Ejemplos'
exp_id = mlflow.create_experiment(name=exp_name)

with mlflow.start_run(experiment_id=exp_id, run_name="Kmeans - K=2"):
    modelo_clusters = KMeans(n_clusters=2)
    trained_model = modelo_clusters.fit(features)
    cluster_labels = trained_model.labels_
    score = silhouette_score(features, cluster_labels)
    #save parameter
    mlflow.log_param('value_of_k', 2)
    #save metric
    mlflow.log_metric('silhoutte_score', score)
    #save model
    mlflow.sklearn.log_model(trained_model, "Clustering_Model")
    #end current run
    mlflow.end_run()

MlflowException: Experiment 'Clustering-Ejemplos' already exists.

In [6]:
with mlflow.start_run(experiment_id=exp_id, run_name="Kmeans - K=3"):
    modelo_clusters = KMeans(n_clusters=3)
    trained_model = modelo_clusters.fit(features)
    cluster_labels = trained_model.labels_
    score = silhouette_score(features, cluster_labels)
    score_2 = davies_bouldin_score(features, cluster_labels)
    #save parameter
    mlflow.log_param('value_of_k', 3)
    #save metric
    mlflow.log_metric('silhoutte_score', score)
    mlflow.log_metric('davies_bouldin_score', score_2)
    #save model
    mlflow.sklearn.log_model(trained_model, "Clustering_Model")
    #end current run
    mlflow.end_run()

  super()._check_params_vs_input(X, default_n_init=10)


In [7]:
with mlflow.start_run(experiment_id=exp_id, run_name="DBSCAN"):
    modelo_clusters = DBSCAN(eps=0.5, min_samples=5)
    trained_model = modelo_clusters.fit(features)
    cluster_labels = trained_model.labels_
    score=silhouette_score(features, cluster_labels)
    #save parameter
    mlflow.log_param('min_samples', 5)
    mlflow.log_param('eps', 0.5)
    #save metric
    mlflow.log_metric('silhoutte_score', score)
    #save model
    mlflow.sklearn.log_model(trained_model, "Clustering_Model")
    #end current run
    mlflow.end_run()



In [None]:
!mlflow ui