In [1]:
import pandas as pd
import mlflow
import numpy as np
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [2]:
import os
from pathlib import Path

# Obtener ruta absoluta del proyecto
project_path = Path('D:/Proyectos Personales ML/Fraud detection/mlflow')

# Configurar MLflow
mlflow.set_tracking_uri(f"file://{project_path}")

In [3]:
# 1. Configurar MLflow para usar almacenamiento local
mlflow.set_tracking_uri('mlruns')  # Esto creará una carpeta mlruns en tu directorio actual

# Crear o obtener el experimento
experiment_name = "fraud_detection_experiment"
try:
    experiment_id = mlflow.create_experiment(experiment_name)
except:
    experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id

# Crear o obtener el experimento
experiment_name = "fraud_detection_experiment"
try:
    experiment_id = mlflow.create_experiment(experiment_name)
except:
    experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id

# Establecer el experimento activo
mlflow.set_experiment(experiment_name)

# 2. Cargar datos y preparar
df = pd.read_csv('D:/Proyectos Personales ML/Fraud detection/data/processed/data_processed.csv')
X = df.drop('Class', axis=1)
y = df['Class']

# Split de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. Parámetros del modelo
best_params = {
    'n_estimators': 200,
    'max_depth': 20,
    'min_samples_split': 2,
    'max_features': 'sqrt'
}

# 4. Función para entrenar y registrar en MLflow
def train_and_log_model(X_train, X_test, y_train, y_test, params):
    with mlflow.start_run(run_name="random_forest_fraud_detection"):
        # Registrar parámetros
        mlflow.log_params(params)
        
        # Entrenar modelo
        rf = RandomForestClassifier(**params, random_state=42)
        rf.fit(X_train, y_train)
        
        # Hacer predicciones
        y_pred = rf.predict(X_test)
        
        # Calcular y registrar métricas
        metrics = classification_report(y_test, y_pred, output_dict=True)
        mlflow.log_metric("accuracy", metrics['accuracy'])
        mlflow.log_metric("precision", metrics['1']['precision'])
        mlflow.log_metric("recall", metrics['1']['recall'])
        mlflow.log_metric("f1-score", metrics['1']['f1-score'])
        
        # Guardar el modelo
        mlflow.sklearn.log_model(rf, "random_forest_model")
        
        return rf

# 5. Entrenar y registrar el modelo
model = train_and_log_model(X_train, X_test, y_train, y_test, best_params)

print("Modelo entrenado y registrado en MLflow")
print("Puedes ver los resultados en la UI de MLflow en http://localhost:5000")



Modelo entrenado y registrado en MLflow
Puedes ver los resultados en la UI de MLflow en http://localhost:5000
