In a real-world production environment, data arrives continuously from multiple sources - not just one static CSV.

This section defines a reusable pipeline function that automates the process of loading new session data, preprocessing it using the saved
pipeline, generating predictions with the trained model, and saving the results.

The function below simulates what a real e-commerce prediction service would do daily:
    
- Detect and load new data

- Apply preprocessing transformations

- Generate model predictions

- Save results to a timestamped file or database

In [1]:
# Import Libraries
import pandas as pd
import joblib
import os
from datetime import datetime
from sklearn.pipeline import Pipeline

In [2]:
# Define Automation Pipeline Function
def run_cart_abandonment_pipeline(
    model_path="best_svm.pkl",
    preprocessor_path="preprocessor.pkl",
    new_data_path="new_sessions.csv",
    output_dir="predictions_output"
):
    """
    Automates the prediction workflow for new session data.

    Parameters
    ----------
    model_path : str
        Path to the saved trained model (.pkl file).
    preprocessor_path : str
        Path to the saved preprocessing pipeline (.pkl file).
    new_data_path : str
        Path to the new session CSV file (simulated daily data).
    output_dir : str
        Directory to save prediction output files.

    Returns
    -------
    str
        File path of the saved predictions.
    """

    print("\nStarting Cart Abandonment Automation Pipeline")


    # Load preprocessor and model
    if not os.path.exists(model_path) or not os.path.exists(preprocessor_path):
        raise FileNotFoundError("Model or preprocessor file not found. Check file paths.")

    model = joblib.load(model_path)
    preprocessor = joblib.load(preprocessor_path)
    print("Loaded model and preprocessing pipeline successfully")


    # Load new session data
    if not os.path.exists(new_data_path):
        raise FileNotFoundError(f"New data file not found at: {new_data_path}")

    new_df = pd.read_csv(new_data_path)
    print(f"Loaded {len(new_df)} new session records from '{new_data_path}'")

    # Preprocess the data
    X_new_prepared = preprocessor.transform(new_df)

    # Get predictions and probabilities
    y_pred = model.predict(X_new_prepared)
    y_proba = model.predict_proba(X_new_prepared)[:, 1]


    # Combine with original data
    predictions_df = new_df.copy()
    predictions_df["predicted_class"] = y_pred
    predictions_df["abandonment_risk"] = y_proba


    # Save predictions
    os.makedirs(output_dir, exist_ok=True)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_path = os.path.join(output_dir, f"predictions_{timestamp}.csv")
    predictions_df.to_csv(output_path, index=False)
    print(f"Predictions saved to: {output_path}")

    print("Automation pipeline completed successfully.\n")
    return output_path

This can be triggered manually or via a scheduler later.

In [None]:
# Run the Pipeline
run_cart_abandonment_pipeline()