In [9]:
import mlflow

from sklearn.datasets import load_iris
from sklearn.cluster import DBSCAN, KMeans
from sklearn.metrics import (
    silhouette_score,
    davies_bouldin_score,
)

In [10]:
# Se hace la lectura del dataset de prueba

db = load_iris()
features = db.data
target = db.target

In [11]:
# Opción básica
mlflow.autolog()

# Clustering con DBSCAN.
dbscan = DBSCAN(eps=0.5, min_samples=5)

dbscan.fit_predict(features)

2024/10/08 00:05:06 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/10/08 00:05:06 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'd12fc6cebe80462184a582f80bc2f446', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  1,
        1,  1,  1,  1,  1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,
       -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1, -1,  1,  1,  1,
        1,  1,  1, -1, -1,  1, -1, -1,  1,  1,  1,  1,  1,  1,  1, -1, -1,
        1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1, -1, -1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1])

In [12]:
# Clustering con Kmeans.
kmeans = KMeans(n_clusters=3, random_state=42)

kmeans.fit_predict(features)

# davies_bouldin_score(features, kmeans.fit_predict(features))

2024/10/08 00:05:09 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'df2567060f7c4de38a13cdcc9ca7b374', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0,
       0, 0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0,
       0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2], dtype=int32)

In [13]:
# Se puede hacer una experimentación nominal

exp_name = 'Clustering-Ejemplos'
exp_id = mlflow.create_experiment(name=exp_name)

with mlflow.start_run(experiment_id=exp_id, run_name="Kmeans - K=2"):
    modelo_clusters = KMeans(n_clusters=2)
    trained_model = modelo_clusters.fit(features)
    cluster_labels = trained_model.labels_
    score = silhouette_score(features, cluster_labels)
    #save parameter
    mlflow.log_param('value_of_k', 2)
    #save metric
    mlflow.log_metric('silhoutte_score', score)
    #save model
    mlflow.sklearn.log_model(trained_model, "Clustering_Model")
    #end current run
    mlflow.end_run()

MlflowException: Experiment 'Clustering-Ejemplos' already exists.

In [6]:
with mlflow.start_run(experiment_id=exp_id, run_name="Kmeans - K=3"):
    modelo_clusters = KMeans(n_clusters=3)
    trained_model = modelo_clusters.fit(features)
    cluster_labels = trained_model.labels_
    score = silhouette_score(features, cluster_labels)
    score_2 = davies_bouldin_score(features, cluster_labels)
    #save parameter
    mlflow.log_param('value_of_k', 3)
    #save metric
    mlflow.log_metric('silhoutte_score', score)
    mlflow.log_metric('davies_bouldin_score', score_2)
    #save model
    mlflow.sklearn.log_model(trained_model, "Clustering_Model")
    #end current run
    mlflow.end_run()



In [7]:
with mlflow.start_run(experiment_id=exp_id, run_name="DBSCAN"):
    modelo_clusters = DBSCAN(eps=0.5, min_samples=5)
    trained_model = modelo_clusters.fit(features)
    cluster_labels = trained_model.labels_
    score=silhouette_score(features, cluster_labels)
    #save parameter
    mlflow.log_param('min_samples', 5)
    mlflow.log_param('eps', 0.5)
    #save metric
    mlflow.log_metric('silhoutte_score', score)
    #save model
    mlflow.sklearn.log_model(trained_model, "Clustering_Model")
    #end current run
    mlflow.end_run()



In [14]:
!mlflow ui

Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\samy_\Desktop\13MBID_SMDB\13MBID-ABR24-25-SMDLCB\.venv\Lib\site-packages\waitress\__main__.py", line 3, in <module>
    run()  # pragma nocover
    ^^^^^
  File "c:\Users\samy_\Desktop\13MBID_SMDB\13MBID-ABR24-25-SMDLCB\.venv\Lib\site-packages\waitress\runner.py", line 298, in run
    _serve(app, **kw)
  File "c:\Users\samy_\Desktop\13MBID_SMDB\13MBID-ABR24-25-SMDLCB\.venv\Lib\site-packages\waitress\__init__.py", line 13, in serve
    server = _server(app, **kw)
             ^^^^^^^^^^^^^^^^^^
  File "c:\Users\samy_\Desktop\13MBID_SMDB\13MBID-ABR24-25-SMDLCB\.venv\Lib\site-packages\waitress\server.py", line 78, in create_server
    last_serv = TcpWSGIServer(
                ^^^^^^^^^^^^^^
  File "c:\Users\samy_\Desktop\13MBID_SMDB\13MBID-ABR24-25-SMDLCB\.venv\Lib\site-packages\waitress\server.py", line 243, in __init__
    self.bin