# Cart Super Add-On (CSAO) Rail Recommendation System

This notebook implements a complete end-to-end ML system for recommending add-ons in a food delivery platform.

In [None]:
%pip install -q pandas numpy scikit-learn xgboost matplotlib seaborn

In [1]:
import sys
import os
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

# Custom modules
from features import create_features
from models import train_and_evaluate
from cold_start import handle_cold_start
from business_impact import simulate_business_impact
from ab_testing import simulate_ab_test

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

ModuleNotFoundError: No module named 'pandas'

## 1. Data Generation

In [2]:
# Generate synthetic data
os.system('python ../data/generate_data.py')

# Load data
orders_df = pd.read_csv('../data/orders.csv')
users_df = pd.read_csv('../data/users.csv')
restaurants_df = pd.read_csv('../data/restaurants.csv')
items_df = pd.read_csv('../data/items.csv')

print(f"Orders shape: {orders_df.shape}")
print(f"Users shape: {users_df.shape}")
print(f"Restaurants shape: {restaurants_df.shape}")
print(f"Items shape: {items_df.shape}")

NameError: name 'pd' is not defined

In [3]:
# Data exploration
orders_df.head()

NameError: name 'orders_df' is not defined

In [4]:
# Target distribution
sns.countplot(x='addon_accepted', data=orders_df)
plt.title('Add-on Acceptance Distribution')
plt.show()

NameError: name 'sns' is not defined

## 2. Feature Engineering

In [None]:
# Create features
orders_df = create_features(orders_df)
orders_df.head()

## 3. Model Training and Evaluation

In [None]:
# Train and evaluate models
results, lr_model, gb_model = train_and_evaluate(orders_df)

print("Model Performance:")
for model_name, metrics in results.items():
    print(f"{model_name}: AUC={metrics['auc']:.3f}, Precision={metrics['precision']:.3f}, Recall={metrics['recall']:.3f}, Precision@5={metrics['precision_at_5']:.3f}")

## 4. Cold Start Handling

In [None]:
# Example cold start
user_history = orders_df['user_id'].unique()
cold_start_prob = handle_cold_start(9999, 201, 'New York', 'Italian', 12, 'Premium', 'Main', orders_df, user_history)
print(f"Cold start probability: {cold_start_prob}")

## 5. Business Impact Simulation

In [None]:
# Simulate impact
baseline_auc = results['baseline']['auc']
model_auc = results['gradient_boosting']['auc']
impact = simulate_business_impact(baseline_auc, model_auc)

print("Business Impact:")
for key, value in impact.items():
    print(f"{key}: {value}")

## 6. A/B Testing Simulation

In [5]:
# Simulate A/B test
ab_results = simulate_ab_test(baseline_auc, model_auc)

print("A/B Test Results:")
for key, value in ab_results.items():
    print(f"{key}: {value}")

NameError: name 'simulate_ab_test' is not defined

## 7. Production Architecture Overview

### System Design:
- **Cart Event**: User adds item to cart â†’ triggers recommendation request.
- **Feature Store**: Retrieve real-time features (user history, restaurant data, time, cart context).
- **Model API**: Serve predictions from trained models.
- **Ranking Engine**: Rank add-on suggestions based on scores.
- **API Response**: Return top add-ons to app.

### Inference < 300ms:
- Use optimized models (e.g., ONNX for fast inference).
- Pre-compute features where possible.
- Async processing for non-critical parts.

### Scalability:
- Kubernetes for container orchestration.
- Load balancers.
- Horizontal scaling.

### Caching:
- Redis for user features, popular add-ons.
- TTL-based eviction.

### Retraining:
- Batch retraining weekly with new data.
- A/B test new models before deployment.