# Orderbook Optimization Workshop

## Goal
Start with a working but inefficient orderbook for asks, then optimize it together to achieve O(1) best ask lookups.

## API: 3 Core Functions
```python
submit(order_id, price, quantity)  # Add a sell order
cancel(order_id)                   # Cancel a sell order
get_best_price()                   # Get lowest ask price
```

## Target Complexities

| Operation | Current | Target |
|-----------|---------|--------|
| submit() | O(N) | O(log P) |
| cancel() | O(N) | O(P) |
| get_best_price() | O(N) | O(1) |

Where:
- N = total orders
- P = number of unique price levels

## Setup

In [None]:
from enum import Enum
from typing import Optional

class OrderStatus(Enum):
    OPEN = "OPEN"
    CANCELLED = "CANCELLED"

class Order:
    def __init__(self, order_id: str, price: float, quantity: int):
        self.order_id = order_id
        self.price = price
        self.quantity = quantity
        self.status = OrderStatus.OPEN
    
    def __repr__(self):
        return f"Order({self.order_id}, ${self.price}, qty={self.quantity}, {self.status.value})"

print("Setup complete.")

## Suboptimal Orderbook (Asks Only)

### Current Data Structure
```python
orders = [order1, order2, order3, ...]  # Flat list
```

### Performance Issues
- get_best_price() scans entire list for min price: O(N)
- cancel() requires linear search: O(N)
- submit() must check for duplicates: O(N)

In [None]:
class SuboptimalOrderBook:
    """A working but inefficient orderbook for asks."""
    
    def __init__(self):
        self.orders = []
    
    def submit(self, order_id: str, price: float, quantity: int) -> bool:
        """
        Submit a new sell order.
        
        Current: O(N) - scan all orders to check for duplicate ID
        Target: O(log P)
        """
        # Check for duplicate
        for order in self.orders:
            if order.order_id == order_id:
                return False
        
        # Create and add order
        order = Order(order_id, price, quantity)
        self.orders.append(order)
        return True
    
    def cancel(self, order_id: str) -> bool:
        """
        Cancel a sell order by ID.
        
        Current: O(N) - linear search
        Target: O(P)
        """
        for order in self.orders:
            if order.order_id == order_id:
                if order.status == OrderStatus.CANCELLED:
                    return False
                order.status = OrderStatus.CANCELLED
                return True
        return False
    
    def get_best_price(self) -> Optional[float]:
        """
        Get the lowest ask price.
        
        Current: O(N) - scan all orders to find min
        Target: O(1)
        """
        best_price = None
        
        for order in self.orders:
            if order.status == OrderStatus.OPEN:
                if best_price is None or order.price < best_price:
                    best_price = order.price
        
        return best_price
    
    def __repr__(self):
        """Display the orderbook."""
        lines = ["=" * 60, "ASK ORDERBOOK (Suboptimal)", "=" * 60]
        
        # Group by price
        prices = {}
        for order in self.orders:
            if order.status == OrderStatus.OPEN:
                prices[order.price] = prices.get(order.price, 0) + order.quantity
        
        # Show asks (low to high)
        for price in sorted(prices.keys()):
            lines.append(f"ASK: ${price:>8.2f} | Qty: {prices[price]}")
        
        lines.append("=" * 60)
        best = self.get_best_price()
        if best:
            lines.append(f"Best Ask (Lowest): ${best:.2f}")
        lines.append(f"Total orders: {len(self.orders)}")
        
        return "\n".join(lines)

print("SuboptimalOrderBook created.")

## Test Functions

In [None]:
def test_orderbook(orderbook_class, name="OrderBook"):
    """Test basic orderbook functionality."""
    print(f"\n{'='*50}")
    print(f"Testing: {name}")
    print(f"{'='*50}\n")
    
    # Create orderbook and add orders
    ob = orderbook_class()
    ob.submit("A1", 101.0, 50)
    ob.submit("A2", 101.5, 30)
    ob.submit("A3", 102.0, 20)
    
    print(ob)
    print(f"\nBest ask: ${ob.get_best_price()}")
    
    # Test cancel
    print("\nCancelling A1...")
    ob.cancel("A1")
    print(f"New best ask: ${ob.get_best_price()}")
    
    # Test duplicate
    result = ob.submit("A2", 99.0, 100)
    print(f"Duplicate order: {result} (expected: False)\n")

In [None]:
def test_orderbook_latency(orderbook_class, name="OrderBook"):
    """Measure get_best_price() performance."""
    import time
    
    print(f"\n{'='*50}")
    print(f"Latency Test: {name}")
    print(f"{'='*50}\n")
    
    for n in [10, 50, 100, 500, 1000]:
        # Create orderbook with n orders
        ob = orderbook_class()
        for i in range(n):
            ob.submit(f"A{i}", 100.0 + i * 0.01, 10)
        
        # Time 1000 get_best_price() calls
        start = time.time()
        for _ in range(1000):
            ob.get_best_price()
        elapsed = time.time() - start
        
        avg_us = elapsed / 1000 * 1e6
        print(f"{n:4d} orders: {avg_us:6.2f} Âµs/call")
    
    print()

## Test Suboptimal Implementation

In [None]:
test_orderbook(SuboptimalOrderBook, "SuboptimalOrderBook")
test_orderbook_latency(SuboptimalOrderBook, "SuboptimalOrderBook")

## Discussion: Optimization Strategies

### Problem 1: get_best_price() is O(N)
Currently scanning all N orders to find the minimum price.

**Solution: Min Heap**
- Python's heapq is a min heap by default
- Store prices: [101, 102, 103]
- heap[0] always gives the lowest in O(1)
- No negation needed (unlike max heap for bids)

### Problem 2: cancel() is O(N)
Linear search to find order by ID.

**Solution: Dictionary**
```python
orders = {order_id: Order}
order = orders["A1"]  # O(1) lookup
```

### Problem 3: submit() is O(N)
Must scan to check for duplicate ID.

**Solution: Dictionary + Heap**
- Check if order_id in orders: O(1)
- Push to heap: O(log P) where P = price levels
- Total: O(log P)

## Min Heap Overview

### Python's heapq Module
```python
import heapq
heap = []
heapq.heappush(heap, 5)
heapq.heappush(heap, 3)
heapq.heappush(heap, 8)
# heap[0] = 3 (smallest)
```

### Application to Ask Prices
```python
# Ask prices: $101, $102, $103
ask_heap = []
heapq.heappush(ask_heap, 101.0)
heapq.heappush(ask_heap, 102.0)
heapq.heappush(ask_heap, 103.0)

# To get best ask (lowest):
best_ask = ask_heap[0]  # $101
```

### Visual Representation
```
Prices: [101, 102, 103]
         |
         v
Push to heap: [101, 102, 103]
         |
         v
heap[0] = 101  (Lowest ask)
```

Result: O(1) access to lowest ask price.

## Implementation Plan

### Target Data Structure
```python
class OptimizedOrderBook:
    def __init__(self):
        self.asks = {}         # {price: [orders at that price]}
        self.ask_heap = []     # [101, 102, 103]
        self.orders = {}       # {order_id: Order}
```

### Complexity Analysis

1. **submit()**:
   - Check dict for duplicate: O(1)
   - Add to orders dict: O(1)
   - Push to heap: O(log P)
   - Total: O(log P)

2. **cancel()**:
   - Dict lookup: O(1)
   - Mark as cancelled: O(1)
   - Check if price level is empty: O(orders at price)
   - Rebuild heap if needed: O(P)
   - Total: O(P)

3. **get_best_price()**:
   - Peek at heap top: O(1)
   - Total: O(1)

### Expected Results
- Performance improvement: 50-100x faster
- get_best_price(): From 50+ microseconds to <1 microsecond

## Optimized Implementation Skeleton

In [None]:
class OptimizedOrderBook:
    """Optimized orderbook for asks using heap and dict."""
    
    def __init__(self):
        import heapq
        
        self.orders = {}           # {order_id: Order} - O(1) lookup by ID
        self.asks = {}             # {price: [orders at that price]}
        self.ask_heap = []         # Min heap of prices
    
    def submit(self, order_id: str, price: float, quantity: int) -> bool:
        """
        Submit a new sell order.
        Hint: Use heapq.heappush(heap, price)
        Target: O(log P)
        """
        pass

    def cancel(self, order_id: str) -> bool:
        """
        Cancel a sell order by ID.
        Target: O(P)
        """
        pass
        
    def get_best_price(self) -> Optional[float]:
        """
        Get the lowest ask price.
        Target: O(1)
        """
        pass
    
    def __repr__(self):
        """Display the orderbook."""
        lines = ["=" * 60, "ASK ORDERBOOK (Optimized)", "=" * 60]
        
        prices = {}
        for order_id, order in self.orders.items():
            if order.status == OrderStatus.OPEN:
                prices[order.price] = prices.get(order.price, 0) + order.quantity
        
        for price in sorted(prices.keys()):
            lines.append(f"ASK: ${price:>8.2f} | Qty: {prices[price]}")
        
        lines.append("=" * 60)
        best = self.get_best_price()
        if best:
            lines.append(f"Best Ask (Lowest): ${best:.2f}")
        lines.append(f"Total orders: {len([o for o in self.orders.values() if o.status == OrderStatus.OPEN])}")
        
        return "\n".join(lines)

print("OptimizedOrderBook skeleton created.")

## Test Optimized Implementation

In [None]:
test_orderbook(OptimizedOrderBook, "OptimizedOrderBook")
test_orderbook_latency(OptimizedOrderBook, "OptimizedOrderBook")