# Experiment Tracking com MLflow

Este notebook demonstra como usar o MLflow para tracking de experimentos com nosso modelo mockado de recomendação.

In [1]:
import os
import sys
import pandas as pd
import mlflow

# Adiciona o diretório src ao PYTHONPATH
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../../'))
if project_dir not in sys.path:
    sys.path.append(project_dir)

from src.config import configure_mlflow, get_config, CONFIG
from src.recomendation_model.mocked_model import MockedRecommender, MLflowWrapper
from src.features.schemas import get_model_signature, create_mock_input_example
from src.predict.predict import predict_for_user_id
from src.evaluation.utils import evaluate_model
from src.train.utils import load_train_data
from src.data.data_loader import get_evaluation_data

2025-02-27 19:03:17,575 - INFO - Ambiente: dev


In [7]:
sys.path

['/opt/homebrew/Caskroom/miniforge/base/envs/datathon/lib/python39.zip',
 '/opt/homebrew/Caskroom/miniforge/base/envs/datathon/lib/python3.9',
 '/opt/homebrew/Caskroom/miniforge/base/envs/datathon/lib/python3.9/lib-dynload',
 '',
 '/opt/homebrew/Caskroom/miniforge/base/envs/datathon/lib/python3.9/site-packages',
 '/Users/mauricioaraujo/Eng_ML/FIAP/Fase_5-MLOps/ML_Engineer_Datathon/src',
 '/Users/mauricioaraujo/Eng_ML/FIAP/Fase_5-MLOps/ML_Engineer_Datathon']

In [2]:
CONFIG

{'MLFLOW_TRACKING_URI': 'http://localhost:5001',
 'MLFLOW_REGISTRY_URI': 'http://localhost:5001',
 'MLFLOW_DEFAULT_ARTIFACT_ROOT': './mlruns',
 'EXPERIMENT': 'news-recommendation-dev',
 'MODEL_NAME': 'news-recommender-dev',
 'API_HOST': '0.0.0.0',
 'API_PORT': 8000,
 'MODEL_STAGE': 'Development',
 'COLD_START_THRESHOLD': 5,
 'SAMPLE_RATE': 0.1,
 'NEWS_TEMP_PATH': 'data/challenge-webmedia-e-globo-2023/itens/itens/itens-parte{}.csv',
 'NEWS_N_CSV_FILES': 3,
 'USERS_TEMP_PATH': 'data/challenge-webmedia-e-globo-2023/files/treino/treino_parte{}.csv',
 'USERS_N_CSV_FILES': 6,
 'MODEL_PARAMS': {'threshold': 0.5, 'top_k': 5}}

## 1. Configuração do MLflow

In [9]:
os.getcwd()

'/Users/mauricioaraujo/Eng_ML/FIAP/Fase_5-MLOps/ML_Engineer_Datathon/notebooks/mauricio'

In [3]:
# Configura o MLflow
configure_mlflow()

In [4]:
model_params = {
    'threshold': 0.5,
    'top_k': 5
}

## 2. Treinamento e Tracking

In [6]:
X_train, y_train = load_train_data()
evaluation_data = get_evaluation_data()

input_example = create_mock_input_example()

artifact_root = get_config("MLFLOW_ARTIFACT_ROOT", "./mlruns")
os.environ["MLFLOW_ARTIFACTS_DESTINATION"] = artifact_root


# Inicia um novo experimento
with mlflow.start_run() as run:
    # Cria e treina o modelo
    model = MockedRecommender(**model_params)
    model.train(X_train, y_train)
    
    # Loga parâmetros
    mlflow.log_params(model_params)
    
    # Loga métricas
    metrics = evaluate_model(model, evaluation_data)
    mlflow.log_metrics(metrics)
    
    # Salva o modelo usando o wrapper e a signature
    wrapper = MLflowWrapper(model)
    mlflow.pyfunc.log_model(
        artifact_path=get_config('MODEL_NAME'),
        python_model=wrapper,
        signature=get_model_signature(),
        input_example=input_example
    )
    
    # Guarda o run_id
    run_id = run.info.run_id


2025/02/27 19:03:50 INFO mlflow.pyfunc: Validating input example against model signature


🏃 View run rebellious-shrew-608 at: http://localhost:5001/#/experiments/2/runs/5623e505d8be42a2af8a14722f98998a
🧪 View experiment at: http://localhost:5001/#/experiments/2


OSError: [Errno 30] Read-only file system: '/mlruns'

## 3. Carregamento e Predição

In [6]:
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/{get_config('MODEL_NAME')}")

# Faz predições com o modelo carregado
predictions = loaded_model.predict(input_example)
print(f"Predictions: {predictions}")

# Teste com o modelo original também
original_predictions = model.predict(input_example)
print(f"Original predictions: {original_predictions}")

Predictions: [0.5]
Original predictions: [0.5]
