In [None]:
import logging
import mlflow
import mlflow.sklearn
from mlflow import MlflowClient
from mlflow.exceptions import MlflowException
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split

from src.preprocesamiento.preprocesamiento import crear_preprocesador, validar_y_limpieza
from src.train.train import obtener_modelos, entrenar_y_loggear
from src.config.config import load_catalog

In [None]:
logger = logging.getLogger(__name__)

In [None]:
widget = "train or retraining"

In [None]:
client = MlflowClient()

#params
catalog, params = load_catalog(env="base")
nombre_run = params['nombre_run']
nombre_experimento = params['nombre_experiment']
alias = params['aliases_champion']
params_modeling = params['modeling']
nombre_usecase = params['nombre_usecase']

# features
num_features = params['num_features']
cat_features = params['cat_features']


if widget == "train":
    df = catalog.load('titanic_intermediate')
elif widget == "retraining":
    df = catalog.load('titanic_intermediate_retraining')
else:
    raise ValueError("Widget debe ser 'train' o 'retraining'")

# obtener parametros y procesador
params = obtener_modelos(params_modeling)
procesador = crear_preprocesador(num_features, cat_features)

# Df's
X = df[num_features + cat_features]
y = df['Survived']

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

if widget == "train":
    logger.info("Iniciando proceso de entrenamiento")   

    for param in params:
        print(f"Entrenando modelo con parámetros: {param}")

        entrenar_y_loggear(
        nombre_run,
        nombre_experimento,
        param,
        procesador,
        x_train,
        y_train,
        x_test,      
        y_test,     
        tags=None,  
        cv=5
    )
elif widget == "retraining":
    logger.info("Iniciando proceso de reentrenamiento")   
    # Logica para Reentrenamiento: cargar modelo campeón, cargar datos nuevos, entrenar con nuevos datos, comparar con modelo campeón, si mejora loggear nuevo modelo como campeón.
    for param in params:
        print(f"Entrenando modelo con parámetros: {param}")

        entrenar_y_loggear(
        nombre_run,
        nombre_experimento,
        param,
        procesador,
        x_train,
        y_train,
        x_test,      
        y_test,     
        tags=None,  
        cv=5
    )
else:
    raise ValueError("Widget debe ser 'train' o 'retraining'")

In [None]:

try:
    client.get_registered_model(nombre_usecase)
    print("El modelo ya existe")
except MlflowException:
    client.create_registered_model(nombre_usecase)
    print("Modelo registrado creado.")

# Obtener mejores modelos
top_2 = mlflow.search_runs(
    experiment_names=[nombre_experimento],
    order_by=["metrics.mean_cv_accuracy DESC"],
    max_results=2
)

best_run_id = top_2.iloc[0]["tags.mlflow.parentRunId"]
second_best_run_id = top_2.iloc[1]["tags.mlflow.parentRunId"]

if widget == "train":
    
    version = mlflow.register_model(f'runs:/{best_run_id}/model',nombre_usecase)

    client.set_registered_model_alias(
        name=nombre_usecase,
        alias="champion",
        version=version.version
    )

    version2 = mlflow.register_model(f'runs:/{second_best_run_id}/model',nombre_usecase)

    client.set_registered_model_alias(
        name=nombre_usecase,
        alias="challenger",
        version=version2.version
    )
elif widget == "retraining":
    version = mlflow.register_model(f'runs:/{best_run_id}/model',nombre_usecase)

    client.set_registered_model_alias(
        name=nombre_usecase,
        alias="challenger",
        version=version.version
    )


In [None]:

# cargar modelo, cargar datos nuevos.
model = mlflow.sklearn.load_model(f"models:/{nombre_usecase}@champion")

# Cargar modulo de preprocesamiento
df_to_predict = validar_y_limpieza(df, params['data_validation']['columnas'])

# features
num_features = params['num_features']
cat_features = params['cat_features']

# Df's
X = df_to_predict[num_features + cat_features]
y = df_to_predict['Survived']

predicciones = model.predict(X)

X['prediction'] =  predicciones
X['Survived'] = y

X['version'] = model.version

catalog.save(ds_name='baseline_modelo',data=X)