### Implementing and Logging an ML Experiment with MLFlow
**Description**: Train an ML model for an e-commerce recommendation engine using MLFlow to track models and experiments.

**Steps**:
1. MLFlow Integration Setup
2. Training the Model
3. Logging the Experiment
4. Accessing MLFlow UI

In [1]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import mlflow
import mlflow.sklearn

# Step 2: Simulated E-Commerce Recommendation Data
def generate_fake_data():
    np.random.seed(42)
    data = {
        "user_age": np.random.randint(18, 70, size=1000),
        "user_location_score": np.random.rand(1000),
        "product_category": np.random.randint(1, 10, size=1000),
        "previous_purchases": np.random.randint(0, 20, size=1000),
        "recommended": np.random.randint(0, 2, size=1000)  # Target: 1 = bought, 0 = skipped
    }
    return pd.DataFrame(data)

# Step 3: Train/Test Split
def prepare_data(df):
    X = df.drop("recommended", axis=1)
    y = df["recommended"]
    return train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train & Log Model using MLFlow
def train_and_log_model(X_train, X_test, y_train, y_test):
    mlflow.set_experiment("Ecommerce_Recommendation_Engine")
    
    with mlflow.start_run():
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        acc = accuracy_score(y_test, y_pred)

        # Log parameters
        mlflow.log_param("model_type", "RandomForestClassifier")
        mlflow.log_param("n_estimators", 100)

        # Log metrics
        mlflow.log_metric("accuracy", acc)

        # Log model
        mlflow.sklearn.log_model(model, "ecommerce_model")

        print(f"✅ Model logged to MLFlow with accuracy: {acc:.2f}")
        print("👉 View with: mlflow ui (http://localhost:5000)")

# Step 5: Run Entire Workflow
if __name__ == "__main__":
    try:
        df = generate_fake_data()
        X_train, X_test, y_train, y_test = prepare_data(df)
        train_and_log_model(X_train, X_test, y_train, y_test)
    except Exception as e:
        print(f"❌ Pipeline failed: {e}")

2025/05/22 16:50:09 INFO mlflow.tracking.fluent: Experiment with name 'Ecommerce_Recommendation_Engine' does not exist. Creating a new experiment.


✅ Model logged to MLFlow with accuracy: 0.48
👉 View with: mlflow ui (http://localhost:5000)
