In [1]:
# --- Importaciones (Añadimos mlflow) ---
import pandas as pd
from surprise import Reader, Dataset, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy
import os
import mlflow
import mlflow.sklearn # Importante para el logging del modelo

print("Librerías listas, incluyendo MLflow.")

# --- Carga de Datos (Sin cambios) ---
try:
    BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
except NameError:
    BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))

DATA_DIR = os.path.join(BASE_DIR, 'data', 'ml-25m')
RATINGS_PATH = os.path.join(DATA_DIR, 'ratings.csv')
ratings_df = pd.read_csv(RATINGS_PATH)

n_users = 40000
n_movies = 20000
user_ids = ratings_df['userId'].value_counts().nlargest(n_users).index
movie_ids = ratings_df['movieId'].value_counts().nlargest(n_movies).index
sampled_df = ratings_df[(ratings_df['userId'].isin(user_ids)) & (ratings_df['movieId'].isin(movie_ids))]

reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(sampled_df[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# MLflow: 1. Definir el nombre de nuestro "cajón" de experimentos.
mlflow.set_experiment("LatentLens-KNN-Evaluation")

# MLflow: 2. Iniciar una "ejecución" (un run). Todo dentro de este bloque `with` será registrado.
with mlflow.start_run() as run:
    print(f"MLflow Run ID: {run.info.run_id}")
    
    # --- Parámetros del Experimento ---
    k_neighbors = 40
    similarity_metric = 'cosine'
    
    # MLflow: 3. Registrar los parámetros que definen este experimento.
    mlflow.log_param("model_type", "KNNBasic")
    mlflow.log_param("k_neighbors", k_neighbors)
    mlflow.log_param("similarity_metric", similarity_metric)
    mlflow.log_param("user_sample_size", n_users)
    mlflow.log_param("movie_sample_size", n_movies)
    
    # --- Entrenamiento ---
    sim_options = {'name': similarity_metric, 'user_based': False}
    model = KNNBasic(k=k_neighbors, sim_options=sim_options)
    model.fit(trainset)
    
    # --- Evaluación ---
    predictions = model.test(testset)
    rmse = accuracy.rmse(predictions)
    
    # MLflow: 4. Registrar la métrica de resultado.
    mlflow.log_metric("rmse", rmse)

    # MLflow: 5. Registrar el modelo como un "artefacto".
    mlflow.sklearn.log_model(model, "surprise_knn_model")

print("\n¡Ejecución de MLflow finalizada! Revisa la UI en http://127.0.0.1:5000")

ModuleNotFoundError: No module named 'surprise'