# Product Recommendation Engine - Quick Demo

This notebook demonstrates the key components of the recommendation engine:
- Data loading and preprocessing
- Two-tower model architecture
- FAISS vector search
- A/B testing framework

In [None]:
import sys
import os
sys.path.append('../src')

import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Import our modules
from src.utils.config import Config
from src.models.two_tower import TwoTowerModel
from src.retrieval.faiss_index import FAISSIndex
from src.evaluation.ab_testing import ABTestManager, simulate_ab_test_data
from src.data.clickstream_sim import ClickstreamSimulator

# Set random seeds
np.random.seed(42)
torch.manual_seed(42)

print("🚀 Recommendation Engine Demo")
print(f"PyTorch version: {torch.__version__}")
print(f"Device: {'GPU' if torch.cuda.is_available() else 'CPU'}")

## 1. Create Synthetic Data

In [None]:
# Create synthetic users and items data
num_users = 1000
num_items = 500
num_interactions = 10000

# Generate users
users_df = pd.DataFrame({
    'user_id': range(1, num_users + 1),
    'age': np.random.randint(18, 65, num_users),
    'gender': np.random.choice(['M', 'F'], num_users)
})

# Generate items
item_categories = ['Action', 'Comedy', 'Drama', 'Horror', 'Romance', 'Sci-Fi', 'Thriller']
items_df = pd.DataFrame({
    'item_id': range(1, num_items + 1),
    'title': [f'Movie {i}' for i in range(1, num_items + 1)],
    'genres': [np.random.choice(item_categories, size=np.random.randint(1, 4)) for _ in range(num_items)]
})

# Convert genres to string format
items_df['genres'] = items_df['genres'].apply(lambda x: '|'.join(x))

# Generate interactions with some realistic patterns
interactions = []
for _ in range(num_interactions):
    user_id = np.random.randint(1, num_users + 1)
    item_id = np.random.randint(1, num_items + 1)
    
    # Simulate rating based on user/item "compatibility"
    user_bias = (user_id % 10) / 10.0  # Some users are more positive
    item_bias = (item_id % 20) / 20.0  # Some items are more popular
    base_rating = 2.5 + user_bias + item_bias + np.random.normal(0, 0.5)
    rating = np.clip(base_rating, 1, 5)
    
    interactions.append({
        'user_id': user_id,
        'item_id': item_id,
        'rating': rating,
        'timestamp': datetime.now() - timedelta(days=np.random.randint(0, 365))
    })

interactions_df = pd.DataFrame(interactions)

print(f"📊 Generated synthetic data:")
print(f"  Users: {len(users_df)}")
print(f"  Items: {len(items_df)}")
print(f"  Interactions: {len(interactions_df)}")
print(f"  Average rating: {interactions_df['rating'].mean():.2f}")

## 2. Two-Tower Model Demo

In [None]:
# Initialize a small two-tower model for demo
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = TwoTowerModel(
    num_users=num_users,
    num_items=num_items,
    embedding_dim=32,  # Small for demo
    tower_dims=[64, 32],
    user_feature_dim=3,  # age, gender features
    item_feature_dim=7,  # genre features
    dropout=0.1
).to(device)

print(f"🧠 Two-Tower Model:")
print(f"  Parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"  Device: {device}")

# Demo forward pass
batch_size = 16
user_ids = torch.randint(0, num_users, (batch_size,)).to(device)
item_ids = torch.randint(0, num_items, (batch_size,)).to(device)

# Generate random features
user_features = torch.randn(batch_size, 3).to(device)
item_features = torch.randn(batch_size, 7).to(device)

with torch.no_grad():
    similarities, user_emb, item_emb = model(user_ids, item_ids, user_features, item_features)

print(f"  Similarity scores shape: {similarities.shape}")
print(f"  User embeddings shape: {user_emb.shape}")
print(f"  Item embeddings shape: {item_emb.shape}")

## 3. FAISS Vector Search Demo

In [None]:
# Generate item embeddings
print("🔍 FAISS Vector Search Demo")

# Get all item embeddings from the model
with torch.no_grad():
    all_item_ids = torch.arange(num_items).to(device)
    dummy_item_features = torch.randn(num_items, 7).to(device)
    item_embeddings = model.get_item_embedding(all_item_ids, dummy_item_features)
    item_embeddings_np = item_embeddings.cpu().numpy()

# Create FAISS index
faiss_index = FAISSIndex(
    dimension=item_embeddings_np.shape[1],
    index_type="Flat",  # Exact search for demo
    metric="inner_product"
)

# Build index
item_ids = list(range(num_items))
faiss_index.build_index(item_embeddings_np, item_ids)

print(f"  Index built with {faiss_index.index.ntotal} vectors")
print(f"  Dimension: {faiss_index.dimension}")

# Demo search
query_item_id = 0
query_embedding = item_embeddings_np[query_item_id:query_item_id+1]

similar_items, similarities = faiss_index.search(query_embedding, k=10)

print(f"\n  Similar items to item {query_item_id}:")
for i, (item_id, sim) in enumerate(zip(similar_items[0], similarities[0])):
    print(f"    {i+1}. Item {item_id} (similarity: {sim:.3f})")

## 4. A/B Testing Demo

In [None]:
print("🧪 A/B Testing Demo")

# Initialize A/B test manager
ab_manager = ABTestManager(min_sample_size=100)

# Create experiment
experiment_id = "demo_experiment"
experiment = ab_manager.create_experiment(
    experiment_id=experiment_id,
    name="Demo A/B Test",
    description="Testing new recommendation algorithm",
    treatment_percentage=0.5,
    target_metrics=['ctr', 'conversion_rate']
)

# Start experiment
ab_manager.start_experiment(experiment_id)

print(f"  Created experiment: {experiment['name']}")
print(f"  Treatment percentage: {experiment['treatment_percentage']*100}%")

# Simulate data
simulation_data = simulate_ab_test_data(
    ab_manager, 
    experiment_id, 
    num_users=1000, 
    days=7
)

print(f"\n  Simulation completed:")
print(f"    Control users: {len(simulation_data['control_users'])}")
print(f"    Treatment users: {len(simulation_data['treatment_users'])}")

# Analyze results
results = ab_manager.analyze_experiment(experiment_id)

print(f"\n  📈 Results:")
if results['status'] == 'complete':
    for metric_name, metric_data in results['metrics'].items():
        print(f"    {metric_name.upper()}:")
        print(f"      Control: {metric_data['control']:.4f}")
        print(f"      Treatment: {metric_data['treatment']:.4f}")
        print(f"      Lift: {metric_data['lift_percentage']:.2f}%")
    
    print(f"\n    Recommendation: {results['recommendations']['action']}")
    print(f"    Confidence: {results['recommendations']['confidence']}")
else:
    print(f"    Status: {results['status']}")

## 5. Clickstream Simulation

In [None]:
print("📱 Clickstream Simulation Demo")

# Create clickstream simulator
simulator = ClickstreamSimulator(
    users_df=users_df.head(100),  # Use first 100 users for demo
    items_df=items_df.head(50),   # Use first 50 items for demo
    seed=42
)

# Simulate clickstream for a short period
start_date = datetime.now() - timedelta(days=2)
end_date = datetime.now()

clickstream_df = simulator.simulate_period(start_date, end_date)

print(f"  Generated {len(clickstream_df)} clickstream events")

# Analyze clickstream
if len(clickstream_df) > 0:
    print(f"\n  📊 Clickstream Analysis:")
    print(f"    Unique users: {clickstream_df['user_id'].nunique()}")
    print(f"    Unique items: {clickstream_df['item_id'].nunique()}")
    print(f"    Action distribution:")
    
    action_counts = clickstream_df['action'].value_counts()
    for action, count in action_counts.items():
        percentage = count / len(clickstream_df) * 100
        print(f"      {action}: {count} ({percentage:.1f}%)")
    
    # Plot action distribution
    plt.figure(figsize=(10, 6))
    
    plt.subplot(1, 2, 1)
    action_counts.plot(kind='bar')
    plt.title('Action Distribution')
    plt.xlabel('Action')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    
    plt.subplot(1, 2, 2)
    hourly_activity = clickstream_df.groupby('hour').size()
    hourly_activity.plot(kind='line', marker='o')
    plt.title('Activity by Hour')
    plt.xlabel('Hour of Day')
    plt.ylabel('Number of Events')
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()
else:
    print("  No events generated (try increasing simulation period)")

## 6. Model Evaluation Metrics

In [None]:
from src.evaluation.metrics import RecommendationMetrics

print("📏 Evaluation Metrics Demo")

# Create sample predictions and ground truth
predicted = [1, 3, 5, 7, 9, 2, 4, 6, 8, 10]  # Recommended items
actual = [1, 2, 5, 8, 12, 15]  # Items user actually liked

metrics = RecommendationMetrics()

# Calculate various metrics
k_values = [5, 10]
results = {}

for k in k_values:
    results[f'precision@{k}'] = metrics.precision_at_k(predicted, actual, k)
    results[f'recall@{k}'] = metrics.recall_at_k(predicted, actual, k)
    results[f'f1@{k}'] = metrics.f1_at_k(predicted, actual, k)
    results[f'hit_rate@{k}'] = metrics.hit_rate(predicted, actual, k)

results['mrr'] = metrics.mean_reciprocal_rank(predicted, actual)
results['ap'] = metrics.average_precision(predicted, actual)

print(f"  Sample Metrics:")
print(f"    Predicted: {predicted}")
print(f"    Actual: {actual}")
print("")

for metric_name, value in results.items():
    print(f"    {metric_name}: {value:.3f}")

# Visualize metrics
plt.figure(figsize=(12, 4))

# Precision and Recall at different k
plt.subplot(1, 3, 1)
k_range = range(1, 11)
precisions = [metrics.precision_at_k(predicted, actual, k) for k in k_range]
recalls = [metrics.recall_at_k(predicted, actual, k) for k in k_range]

plt.plot(k_range, precisions, 'o-', label='Precision@k')
plt.plot(k_range, recalls, 's-', label='Recall@k')
plt.xlabel('k')
plt.ylabel('Score')
plt.title('Precision and Recall vs k')
plt.legend()
plt.grid(True)

# F1 Score
plt.subplot(1, 3, 2)
f1_scores = [metrics.f1_at_k(predicted, actual, k) for k in k_range]
plt.plot(k_range, f1_scores, 'o-', color='green')
plt.xlabel('k')
plt.ylabel('F1 Score')
plt.title('F1 Score vs k')
plt.grid(True)

# Hit Rate
plt.subplot(1, 3, 3)
hit_rates = [metrics.hit_rate(predicted, actual, k) for k in k_range]
plt.plot(k_range, hit_rates, 'o-', color='red')
plt.xlabel('k')
plt.ylabel('Hit Rate')
plt.title('Hit Rate vs k')
plt.grid(True)

plt.tight_layout()
plt.show()

## Summary

This demo showcased the key components of our production-grade recommendation engine:

🧠 **Two-Tower Model**: Neural architecture for learning user and item representations  
🔍 **FAISS Search**: Fast similarity search for candidate generation  
🧪 **A/B Testing**: Statistical framework for experiment management  
📱 **Clickstream Simulation**: Realistic user behavior modeling  
📏 **Evaluation Metrics**: Comprehensive performance measurement  

The system is designed for production scale with:
- Sub-50ms inference latency
- Redis caching for performance  
- FastAPI serving layer
- Docker containerization
- Comprehensive monitoring

**Next Steps:**
1. Train on real data (MovieLens, Amazon Reviews)
2. Deploy with `docker-compose up`
3. Run production API with `python scripts/serve.py`
4. Monitor with `/metrics` endpoint
5. Scale with Kubernetes