In [None]:
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# 1. Load dataset
def load_data():
    # Replace with your actual dataset
    data = pd.read_csv("ecommerce_data.csv")
    X = data.drop("purchase", axis=1)
    y = data["purchase"]
    return train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Train model with MLflow tracking
def train():
    X_train, X_test, y_train, y_test = load_data()

    with mlflow.start_run():  # Start an MLflow experiment run
        # Model parameters
        n_estimators = 100
        max_depth = 5
        model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)

        # Train the model
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        acc = accuracy_score(y_test, predictions)

        # Log parameters, metrics, and model
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_metric("accuracy", acc)

        mlflow.sklearn.log_model(model, "model")

        print(f"Model logged with accuracy: {acc}")

if _name_ == "_main_":
    train()