In [1]:
# Cargar librerías
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
import mlflow
import mlflow.sklearn


In [2]:
try:
    if mlflow.active_run():
        mlflow.end_run()
except Exception as e:
    print(" No se pudo cerrar el run activo (probablemente ya está cerrado):", e)

In [3]:
# Leer datos
df = pd.read_csv("data/diabetes.csv")



In [4]:
# identifica las columnas que tiene valores 0
(df==0).sum()

Pregnancies                 111
Glucose                       5
BloodPressure                35
SkinThickness               227
Insulin                     374
BMI                          11
DiabetesPedigreeFunction      0
Age                           0
Outcome                     500
dtype: int64

In [5]:
# Reemplazar ceros en columnas específicas por NaN
cols_to_clean = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin']
# No se consideran algunas, por ejemplo Pregnancies (por qué si es posible que se tengan cero embarazos)

# Reemplazar ceros por NaN para poder tratarlos como datos faltantes
for col in cols_to_clean:
    df[col] = df[col].replace(0, np.nan)

# Reemplazar NaN con la moda (valor más frecuente) de cada columna
for col in cols_to_clean:
    moda = df[col].mode()[0]
    df[col] = df[col].fillna(moda)


In [6]:
# Separar datos
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# agregar el traking uri de mlflow
mlflow.set_tracking_uri("http://localhost:9090")
# agregar set_experiment con nombre: ClasificadorDemoDiabetes
mlflow.set_experiment("Clasificador_Demo_Diabetes")

2025/05/23 09:46:07 INFO mlflow.tracking.fluent: Experiment with name 'Clasificador_Demo_Diabetes' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1748011567809, experiment_id='1', last_update_time=1748011567809, lifecycle_stage='active', name='Clasificador_Demo_Diabetes', tags={}>

In [8]:
# Entrenamiento y registro con MLflow
C = 1.0
max_iter = 1000
# usar los parámetros del archivo logreg_variaciones_educativas.csv
# para los parámtros C, max_iter, solver y penalty de LogisticRegresion
# se debe genear un run por cada fila del archivo, usando sus parámetros
# Analizar que usar



# Leer archivo de parámetros
param_df = pd.read_csv("data/logreg_variaciones_educativas.csv")


for i, row in param_df.iterrows():
    print(row["logreg_C"])
    run_name = f"Clasificador_Diabetes_{row['run_id']}"
  

    # ejecutar el experimento por fila
    with mlflow.start_run(run_name=run_name):

        # construir el pipeline
        pipeline = Pipeline([
            ("scaler", StandardScaler()),
            ("clf", LogisticRegression(
                C=row["logreg_C"],
                max_iter=int(row["logreg_max_iter"]),
                solver=row["solver"],
                penalty=row["penalty"]
            ))
        ])



    
        # Entrenar y evaluar
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
    
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
    
        # recuerde que log_param y log_metric debe ir guardando los valores
        # dependiendo del run correspondiente de cada fila
        mlflow.log_param("logreg_C", C)
        mlflow.log_param("logreg_max_iter", max_iter)
        mlflow.log_param("solver", "liblinear")
        mlflow.log_param("penalty", "l2")
        
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
        
        # Guardar el pipeline completo
        #mlflow.sklearn.log_model(pipeline, "modelo_pipeline")
        #mlflow.sklearn.log_model(pipeline,"modelo_pipeline")
        mlflow.sklearn.log_model(pipeline, artifact_path="modelo_diabetes")
    
        
        print(" Modelo registrado en MLflow")
        print(f"Accuracy: {acc:.4f} | Precision: {prec:.4f}")

0.01




 Modelo registrado en MLflow
Accuracy: 0.7208 | Precision: 0.6034
🏃 View run Clasificador_Diabetes_practica_01 at: http://localhost:9090/#/experiments/1/runs/8d1a8f4695fa471086fd764fa3cef7be
🧪 View experiment at: http://localhost:9090/#/experiments/1
1.12




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run Clasificador_Diabetes_practica_02 at: http://localhost:9090/#/experiments/1/runs/a7bef01205fe419ea01e7593d2259ba7
🧪 View experiment at: http://localhost:9090/#/experiments/1
2.23




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run Clasificador_Diabetes_practica_03 at: http://localhost:9090/#/experiments/1/runs/e094665c404f405ca875fae9e46d9bed
🧪 View experiment at: http://localhost:9090/#/experiments/1
3.34




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run Clasificador_Diabetes_practica_04 at: http://localhost:9090/#/experiments/1/runs/745dd8c9216444e7a7297cd48ad16a34
🧪 View experiment at: http://localhost:9090/#/experiments/1
4.45




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run Clasificador_Diabetes_practica_05 at: http://localhost:9090/#/experiments/1/runs/a34e6945c35a480aba423ba6c75be59d
🧪 View experiment at: http://localhost:9090/#/experiments/1
5.5600000000000005




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run Clasificador_Diabetes_practica_06 at: http://localhost:9090/#/experiments/1/runs/16a299a3785843a589c776105f7ba4ac
🧪 View experiment at: http://localhost:9090/#/experiments/1
6.67




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run Clasificador_Diabetes_practica_07 at: http://localhost:9090/#/experiments/1/runs/37feab56d25245afaa641d187301ad1e
🧪 View experiment at: http://localhost:9090/#/experiments/1
7.78




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run Clasificador_Diabetes_practica_08 at: http://localhost:9090/#/experiments/1/runs/6b93e867102d40388353159012497786
🧪 View experiment at: http://localhost:9090/#/experiments/1
8.89




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run Clasificador_Diabetes_practica_09 at: http://localhost:9090/#/experiments/1/runs/f10d4064a3d249ec87e92c4c3d4282b3
🧪 View experiment at: http://localhost:9090/#/experiments/1
10.0




 Modelo registrado en MLflow
Accuracy: 0.7532 | Precision: 0.6667
🏃 View run Clasificador_Diabetes_practica_10 at: http://localhost:9090/#/experiments/1/runs/eea94e67646c489197a01ed6e52fe647
🧪 View experiment at: http://localhost:9090/#/experiments/1
