In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse
import mlflow
import mlflow.sklearn
import os

# Set the MLflow tracking URI (default is local file-based)
mlflow.set_tracking_uri("http://127.0.0.1:5000")  # Ensure MLflow server is running at this URI

# Set or create an MLflow experiment
experiment_name = "Ecommerce_Recommendation_Engine"
mlflow.set_experiment(experiment_name)

# Load the sample e-commerce dataset
# For demonstration, we'll create a synthetic dataset
# In practice, replace this with loading your actual dataset
data_dict = {
    'user_id': np.random.randint(1, 100, 1000),
    'item_id': np.random.randint(1, 500, 1000),
    'rating': np.random.randint(1, 6, 1000)
}
df = pd.DataFrame(data_dict)

# Define a Reader and load the data into Surprise Dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'item_id', 'rating']], reader)

# Split the dataset into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Define model parameters
model_params = {
    "n_factors": 100,
    "n_epochs": 20,
    "lr_all": 0.005,
    "reg_all": 0.02
}

# Start an MLflow run
with mlflow.start_run(run_name="SVD_Collaborative_Filtering") as run:
    # Initialize and train the SVD model
    model = SVD(**model_params)
    model.fit(trainset)

    # Make predictions on the test set
    predictions = model.test(testset)

    # Calculate evaluation metric (RMSE)
    test_rmse = rmse(predictions, verbose=False)

    # Log parameters and metrics to MLflow
    mlflow.log_params(model_params)
    mlflow.log_metric("test_rmse", test_rmse)

    # Log the model
    # Note: Surprise models are not directly supported by mlflow.sklearn
    # We'll save the model manually and log it as an artifact
    model_path = "svd_model.pkl"
    import joblib
    joblib.dump(model, model_path)
    mlflow.log_artifact(model_path)

    print(f"Run completed with ID: {run.info.run_id}")
    print(f"Logged metrics:\n  - Test RMSE: {test_rmse:.4f}")
    print(f"Model and metrics are logged under the experiment: '{experiment_name}'")

    # Clean up the saved model file
    os.remove(model_path)