### Implementing and Logging an ML Experiment with MLFlow
**Description**: Train an ML model for an e-commerce recommendation engine using MLFlow to track models and experiments.

**Steps**:
1. MLFlow Integration Setup
2. Training the Model
3. Logging the Experiment
4. Accessing MLFlow UI

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn
import unittest

# --- Core Functionality ---

def generate_fake_data(n=1000):
    if n <= 0:
        raise ValueError("Number of records must be positive.")
    np.random.seed(42)
    data = {
        "user_age": np.random.randint(18, 70, size=n),
        "user_location_score": np.random.rand(n),
        "product_category": np.random.randint(1, 10, size=n),
        "previous_purchases": np.random.randint(0, 20, size=n),
        "recommended": np.random.randint(0, 2, size=n)  # Target: 1 = bought, 0 = skipped
    }
    return pd.DataFrame(data)

def prepare_data(df):
    if df.empty:
        raise ValueError("Input DataFrame is empty.")
    if "recommended" not in df.columns:
        raise KeyError("Missing target column: 'recommended'")
    X = df.drop("recommended", axis=1)
    y = df["recommended"]
    return train_test_split(X, y, test_size=0.2, random_state=42)

def train_and_log_model(X_train, X_test, y_train, y_test):
    if len(X_train) == 0 or len(y_train) == 0:
        raise RuntimeError("Training data is empty.")

    mlflow.set_experiment("Ecommerce_Recommendation_Engine")

    with mlflow.start_run():
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        acc = accuracy_score(y_test, y_pred)

        mlflow.log_param("model_type", "RandomForestClassifier")
        mlflow.log_param("n_estimators", 100)
        mlflow.log_metric("accuracy", acc)
        mlflow.sklearn.log_model(model, "ecommerce_model")

        print(f"✅ Model logged. Accuracy: {acc:.2f}")
        return acc

# --- Main Execution ---
if __name__ == "__main__":
    try:
        df = generate_fake_data()
        X_train, X_test, y_train, y_test = prepare_data(df)
        train_and_log_model(X_train, X_test, y_train, y_test)
        print("👉 Run `mlflow ui` and visit http://localhost:5000 to inspect the logs.")
    except ValueError as ve:
        print(f"❌ Value error: {ve}")
    except KeyError as ke:
        print(f"❌ Key error: {ke}")
    except RuntimeError as re:
        print(f"❌ Runtime error: {re}")
    except Exception as e:
        print(f"❌ Unexpected error: {e}")

# --- Unit Tests ---
class TestMLFlowPipeline(unittest.TestCase):

    def test_data_generation(self):
        df = generate_fake_data(10)
        self.assertEqual(len(df), 10)
        self.assertIn("recommended", df.columns)

    def test_prepare_data(self):
        df = generate_fake_data(100)
        X_train, X_test, y_train, y_test = prepare_data(df)
        self.assertGreater(len(X_train), 0)
        self.assertEqual(X_train.shape[1], 4)  # 4 features

    def test_model_training(self):
        df = generate_fake_data(100)
        X_train, X_test, y_train, y_test = prepare_data(df)
        acc = train_and_log_model(X_train, X_test, y_train, y_test)
        self.assertGreaterEqual(acc, 0.0)
        self.assertLessEqual(acc, 1.0)

if __name__ == "__main__":
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

ModuleNotFoundError: No module named 'mlflow'