In [1]:
import pandas as pd
import numpy as np
import duckdb

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

from sklearn.decomposition import PCA

import matplotlib.pyplot as plt

import mlflow
from mlflow.models import infer_signature
from mlflow.tracking import MlflowClient

In [None]:
# Caminho do banco
db_path = "../../data/duckdb/database.duckdb"

# Conex√£o com o banco DuckDB
con = duckdb.connect(db_path)

# Define raiz do projeto (ajuste conforme seu ambiente)
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

# Carrega os dados de feature engineering
input_df = con.execute("SELECT * FROM feature.clusterizacao_cliente").df()

input_df = input_df.drop('client_id', axis=1).to_numpy()

# Treinamento de Modelo

In [3]:
# Define experimento
mlflow.set_experiment("clusterizacao_cliente")

model_name = "clusterizacao_cliente"

with mlflow.start_run() as run:

    # Treinamento do modelo KMeans
    kmeans = KMeans(n_clusters=5, random_state=42)
    kmeans.fit(input_df)
    labels = kmeans.labels_
    silhouette = silhouette_score(input_df, labels)

    # Log no MLflow
    mlflow.log_param('n_clusters', 5)
    mlflow.log_metric("silhouette_score", silhouette)

    # Inferir a assinatura do modelo
    signature = infer_signature(input_df, labels)

    # Log do modelo com registro direto
    model_info = mlflow.sklearn.log_model(
        sk_model=kmeans,
        artifact_path="model",
        signature=signature,
        input_example=input_df[:5],  # usar amostra pequena
        registered_model_name=model_name
    )

    
client = MlflowClient()

latest_versions = client.get_latest_versions(name=model_name, stages=["None"])
mv = latest_versions[0] if latest_versions else None
new_version = mv.version

# Tenta recuperar Champion atual
try:
    champion_mv = client.get_model_version_by_alias(model_name, 'champion')
    champion_version = champion_mv.version
    # Recupera m√©trica do champion pela run
    champion_metrics = client.get_run(champion_mv.run_id).data.metrics
    champion_score = champion_metrics.get("silhouette_score", -1)
    print(f"Champion atual: vers√£o {champion_version}, silhouette_score={champion_score:.4f}")
except mlflow.exceptions.MlflowException:
    champion_version = None
    champion_score = -1

# Recupera m√©trica do novo modelo pela run
new_metrics = client.get_run(mv.run_id).data.metrics
new_score = new_metrics.get("silhouette_score", -1)

# Compara√ß√£o e decis√£o
if champion_score == -1:
    # N√£o h√° champion ainda
    client.set_registered_model_alias(model_name, "champion", new_version)
    print(f"Modelo vers√£o {new_version} definido como Champion (primeiro modelo).")
elif new_score > champion_score:
    # Novo modelo √© melhor
    client.set_registered_model_alias(model_name, "champion", new_version)
    print(f"Novo modelo vers√£o {new_version} promovido como Champion (score {new_score:.4f} > {champion_score:.4f}).")
else:
    print(f"Novo modelo vers√£o {new_version} descartado (score {new_score:.4f} <= {champion_score:.4f}).")


# Champion: modelo atualmente em produ√ß√£o (Production no MLflow).

# Challenger: novo modelo treinado que pretende substituir o Champion, mas precisa provar ser melhor (por meio de m√©tricas, testes, etc.).
# Tenta recuperar modelo champion atual




Registered model 'clusterizacao_cliente' already exists. Creating a new version of this model...
2025/07/29 20:53:43 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: clusterizacao_cliente, version 2


Created version '2' of model 'clusterizacao_cliente'.


  latest_versions = client.get_latest_versions(name=model_name, stages=["None"])


üèÉ View run dazzling-mouse-662 at: http://127.0.0.1:8080/#/experiments/195918764261944467/runs/fe92141d4406438993b2cba7aa246d88
üß™ View experiment at: http://127.0.0.1:8080/#/experiments/195918764261944467


Champion atual: vers√£o 1, silhouette_score=0.5305
Novo modelo vers√£o 2 descartado (score 0.5305 <= 0.5305).
